[llvm] [AArch64] Update getCmpSelInstrCost (PR #145568)

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 11:29:37 PDT 2025


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/145568

This currently
  - The AArch64TTIImpl::getCmpSelInstrCost functoin now tried to handle all cost kinds, not just TCK_RecipThroughput.
  - Updates fcmp costs to correctly model the cost of fcmp's predicates that require expansion into multiple instructions.
  - Adds a target feature for when SVE fcmxx instructions have a lower throughput than the Neon versions.
  - Some of the old select code that assumed they would not be well lowered were removed.

>From 3daea677ca4c360fdf193945bf84ed60a1d6101c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 24 Jun 2025 19:15:54 +0100
Subject: [PATCH] [AArch64] Update getCmpSelInstrCost

This currently
  - The AArch64TTIImpl::getCmpSelInstrCost functoin now tried to handle all
    cost kinds, not just TCK_RecipThroughput.
  - Updates fcmp costs to correctly model the cost of fcmp's predicates that
    require expansion into multiple instructions.
  - Adds a target feature for when SVE fcmxx instructions have a lower
    throughput than the Neon versions.
  - Some of the old select code that assumed they would not be well lowered
    were removed.
---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |    1 -
 llvm/lib/Target/AArch64/AArch64Features.td    |    4 +
 llvm/lib/Target/AArch64/AArch64Processors.td  |   14 +
 .../AArch64/AArch64TargetTransformInfo.cpp    |  147 +--
 llvm/test/Analysis/CostModel/AArch64/cmp.ll   |   38 +-
 llvm/test/Analysis/CostModel/AArch64/fcmp.ll  | 1076 +++++++++++++++++
 .../test/Analysis/CostModel/AArch64/select.ll |   22 +-
 .../Analysis/CostModel/AArch64/sve-cmpsel.ll  |   10 +-
 .../CostModel/AArch64/vector-select.ll        |  200 +--
 .../LoopVectorize/AArch64/masked-call.ll      |   27 +-
 .../SLPVectorizer/scalarazied-result.ll       |    4 -
 11 files changed, 1301 insertions(+), 242 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/fcmp.ll

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c22928c9bcd94..b7fef9fba809d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1508,7 +1508,6 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       const auto Op1Info = TTI::getOperandInfo(Operands[0]);
       const auto Op2Info = TTI::getOperandInfo(Operands[1]);
       Type *ValTy = Operands[0]->getType();
-      // TODO: Also handle ICmp/FCmp constant expressions.
       return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
                                            I ? cast<CmpInst>(I)->getPredicate()
                                              : CmpInst::BAD_ICMP_PREDICATE,
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 469c76752c78c..bebd2e1d3f0b9 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -825,6 +825,10 @@ def FeatureDisableFastIncVL : SubtargetFeature<"disable-fast-inc-vl",
                                                "HasDisableFastIncVL", "true",
                                                "Do not prefer INC/DEC, ALL, { 1, 2, 4 } over ADDVL">;
 
+// SVE fcmxx operations sometimes have half the throughput of Neon cmxx.
+def FeatureHalfSVEFCmpBandwidth : SubtargetFeature<"half-sve-fcmp-bandwidth", "HasHalfSVEFCmpBandwidth", "false",
+  "SVE FCmp instructions have half the throughput of Neon instructions">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index e1b82953aad80..5a32ccc1720ce 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -21,6 +21,7 @@ def TuneA320 : SubtargetFeature<"a320", "ARMProcFamily", "CortexA320",
                                    "Cortex-A320 ARM processors", [
                                    FeatureFuseAES,
                                    FeatureFuseAdrpAdd,
+                                   FeatureHalfSVEFCmpBandwidth,
                                    FeaturePostRAScheduler]>;
 
 def TuneA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
@@ -41,6 +42,7 @@ def TuneA510    : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
                                    "Cortex-A510 ARM processors", [
                                    FeatureFuseAES,
                                    FeatureFuseAdrpAdd,
+                                   FeatureHalfSVEFCmpBandwidth,
                                    FeaturePostRAScheduler
                                    ]>;
 
@@ -48,12 +50,14 @@ def TuneA520    : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
                                    "Cortex-A520 ARM processors", [
                                    FeatureFuseAES,
                                    FeatureFuseAdrpAdd,
+                                   FeatureHalfSVEFCmpBandwidth,
                                    FeaturePostRAScheduler]>;
 
 def TuneA520AE  : SubtargetFeature<"a520ae", "ARMProcFamily", "CortexA520",
                                    "Cortex-A520AE ARM processors", [
                                    FeatureFuseAES,
                                    FeatureFuseAdrpAdd,
+                                   FeatureHalfSVEFCmpBandwidth,
                                    FeaturePostRAScheduler]>;
 
 def TuneA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
@@ -173,6 +177,7 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
                                  FeatureALULSLFast,
                                  FeatureFuseAdrpAdd,
                                  FeatureEnableSelectOptimize,
+                                 FeatureHalfSVEFCmpBandwidth,
                                  FeaturePredictableSelectIsExpensive]>;
 
 def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
@@ -235,6 +240,7 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
                                   FeatureALULSLFast,
                                   FeaturePostRAScheduler,
                                   FeatureEnableSelectOptimize,
+                                  FeatureHalfSVEFCmpBandwidth,
                                   FeatureUseFixedOverScalableIfEqualCost,
                                   FeaturePredictableSelectIsExpensive]>;
 
@@ -245,6 +251,7 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
                                FeatureFuseAES,
                                FeaturePostRAScheduler,
                                FeatureEnableSelectOptimize,
+                               FeatureHalfSVEFCmpBandwidth,
                                FeatureUseFixedOverScalableIfEqualCost,
                                FeatureAvoidLDAPUR,
                                FeaturePredictableSelectIsExpensive]>;
@@ -257,6 +264,7 @@ def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
                                FeaturePostRAScheduler,
                                FeatureEnableSelectOptimize,
                                FeatureUseFixedOverScalableIfEqualCost,
+                               FeatureHalfSVEFCmpBandwidth,
                                FeatureAvoidLDAPUR,
                                FeaturePredictableSelectIsExpensive]>;
 
@@ -268,6 +276,7 @@ def TuneX925 : SubtargetFeature<"cortex-x925", "ARMProcFamily",
                                 FeaturePostRAScheduler,
                                 FeatureEnableSelectOptimize,
                                 FeatureUseFixedOverScalableIfEqualCost,
+                                FeatureHalfSVEFCmpBandwidth,
                                 FeatureAvoidLDAPUR,
                                 FeaturePredictableSelectIsExpensive]>;
 
@@ -520,6 +529,7 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
                                       FeatureALULSLFast,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
+                                      FeatureHalfSVEFCmpBandwidth,
                                       FeaturePredictableSelectIsExpensive]>;
 
 def TuneNeoverseN3 : SubtargetFeature<"neoversen3", "ARMProcFamily", "NeoverseN3",
@@ -549,6 +559,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
                                       FeaturePredictableSelectIsExpensive,
+                                      FeatureHalfSVEFCmpBandwidth,
                                       FeatureNoSVEFPLD1R]>;
 
 def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
@@ -560,6 +571,7 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
                                       FeatureUseFixedOverScalableIfEqualCost,
+                                      FeatureHalfSVEFCmpBandwidth,
                                       FeatureAvoidLDAPUR,
                                       FeaturePredictableSelectIsExpensive,
                                       FeatureDisableLatencySchedHeuristic]>;
@@ -571,6 +583,7 @@ def TuneNeoverseV3 : SubtargetFeature<"neoversev3", "ARMProcFamily", "NeoverseV3
                                       FeatureFuseAdrpAdd,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
+                                      FeatureHalfSVEFCmpBandwidth,
                                       FeatureAvoidLDAPUR,
                                       FeaturePredictableSelectIsExpensive]>;
 
@@ -581,6 +594,7 @@ def TuneNeoverseV3AE : SubtargetFeature<"neoversev3AE", "ARMProcFamily", "Neover
                                       FeatureFuseAdrpAdd,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
+                                      FeatureHalfSVEFCmpBandwidth,
                                       FeatureAvoidLDAPUR,
                                       FeaturePredictableSelectIsExpensive]>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 15e38e6cb2408..916b4bfde0022 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4264,99 +4264,78 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
     unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
     TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
     TTI::OperandValueInfo Op2Info, const Instruction *I) const {
-  // TODO: Handle other cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
-                                     Op1Info, Op2Info, I);
-
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  // We don't lower some vector selects well that are wider than the register
-  // width.
-  if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
-    // We would need this many instructions to hide the scalarization happening.
-    const int AmortizationCost = 20;
-
-    // If VecPred is not set, check if we can get a predicate from the context
-    // instruction, if its type matches the requested ValTy.
-    if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
-      CmpPredicate CurrentPred;
-      if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
-                            m_Value())))
-        VecPred = CurrentPred;
-    }
-    // Check if we have a compare/select chain that can be lowered using
-    // a (F)CMxx & BFI pair.
-    if (CmpInst::isIntPredicate(VecPred) || VecPred == CmpInst::FCMP_OLE ||
-        VecPred == CmpInst::FCMP_OLT || VecPred == CmpInst::FCMP_OGT ||
-        VecPred == CmpInst::FCMP_OGE || VecPred == CmpInst::FCMP_OEQ ||
-        VecPred == CmpInst::FCMP_UNE) {
-      static const auto ValidMinMaxTys = {
-          MVT::v8i8,  MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
-          MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
-      static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
-
-      auto LT = getTypeLegalizationCost(ValTy);
-      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||
-          (ST->hasFullFP16() &&
-           any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))
-        return LT.first;
-    }
-
-    static const TypeConversionCostTblEntry
-    VectorSelectTbl[] = {
-      { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
-      { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
-      { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
-      { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
-      { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
-      { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
-    };
-
-    EVT SelCondTy = TLI->getValueType(DL, CondTy);
-    EVT SelValTy = TLI->getValueType(DL, ValTy);
-    if (SelCondTy.isSimple() && SelValTy.isSimple()) {
-      if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
-                                                     SelCondTy.getSimpleVT(),
-                                                     SelValTy.getSimpleVT()))
-        return Entry->Cost;
-    }
-  }
-
-  if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) {
-    Type *ValScalarTy = ValTy->getScalarType();
-    if ((ValScalarTy->isHalfTy() && !ST->hasFullFP16()) ||
-        ValScalarTy->isBFloatTy()) {
-      auto *ValVTy = cast<FixedVectorType>(ValTy);
-
-      // Without dedicated instructions we promote [b]f16 compares to f32.
-      auto *PromotedTy =
-          VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);
-
-      InstructionCost Cost = 0;
-      // Promote operands to float vectors.
-      Cost += 2 * getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
-                                   TTI::CastContextHint::None, CostKind);
-      // Compare float vectors.
+  if (Opcode == Instruction::FCmp) {
+    // Without dedicated instructions we promote f16 + bf16 compares to f32.
+    if ((!ST->hasFullFP16() && ValTy->getScalarType()->isHalfTy()) ||
+        ValTy->getScalarType()->isBFloatTy()) {
+      Type *PromotedTy =
+          ValTy->getWithNewType(Type::getFloatTy(ValTy->getContext()));
+      InstructionCost Cost =
+          getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
+                           TTI::CastContextHint::None, CostKind);
+      if (!Op1Info.isConstant() && !Op2Info.isConstant())
+        Cost *= 2;
       Cost += getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind,
                                  Op1Info, Op2Info);
-      // During codegen we'll truncate the vector result from i32 to i16.
-      Cost +=
-          getCastInstrCost(Instruction::Trunc, VectorType::getInteger(ValVTy),
-                           VectorType::getInteger(PromotedTy),
-                           TTI::CastContextHint::None, CostKind);
+      if (ValTy->isVectorTy())
+        Cost += getCastInstrCost(
+            Instruction::Trunc, VectorType::getInteger(cast<VectorType>(ValTy)),
+            VectorType::getInteger(cast<VectorType>(PromotedTy)),
+            TTI::CastContextHint::None, CostKind);
       return Cost;
     }
+
+    auto LT = getTypeLegalizationCost(ValTy);
+    // Model unknown fp compares as a libcall.
+    if (LT.second.getScalarType() != MVT::f64 &&
+        LT.second.getScalarType() != MVT::f32 &&
+        LT.second.getScalarType() != MVT::f16)
+      return LT.first * getCallInstrCost(/*Function*/ nullptr, ValTy,
+                                         {ValTy, ValTy}, CostKind);
+
+    // Some comparison operators require expanding to multiple compares + or.
+    unsigned Factor = 1;
+    if (!CondTy->isVectorTy() &&
+        (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ))
+      Factor = 2; // fcmp with 2 selects
+    else if (isa<FixedVectorType>(ValTy) &&
+             (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ ||
+              VecPred == FCmpInst::FCMP_ORD))
+      Factor = 3; // fcmxx+fcmyy+or
+    else if (isa<ScalableVectorType>(ValTy) &&
+             (VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ))
+      Factor = 3; // fcmxx+fcmyy+or
+
+    if (isa<ScalableVectorType>(ValTy) &&
+        CostKind == TTI::TCK_RecipThroughput && ST->hasHalfSVEFCmpBandwidth())
+      Factor *= 2;
+
+    return Factor * (CostKind == TTI::TCK_Latency ? 2 : LT.first);
+  }
+
+  // We don't lower some vector selects well that are wider than the register
+  // width.
+  if (isa<FixedVectorType>(ValTy) && Opcode == Instruction::Select) {
+    // Check if we have a compare/select chain that can be lowered using
+    // a (F)CMxx & BFI pair. This assumes that the select can use a BIC/BIF/BSL
+    // so has a cost of 1, and the the comparison typesize matches the select
+    // size and no extends/truncates are required.
+    static const auto ValidMinMaxTys = {
+        MVT::v8i8,  MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+        MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
+    static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
+
+    auto LT = getTypeLegalizationCost(ValTy);
+    if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||
+        (ST->hasFullFP16() &&
+         any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))
+      return LT.first;
   }
 
   // Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to
   // icmp(and, 0) as free, as we can make use of ands, but only if the
   // comparison is not unsigned.
-  if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I &&
+  if (ValTy->isIntegerTy() && Opcode == Instruction::ICmp && I &&
       !CmpInst::isUnsigned(VecPred) &&
       TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
       match(I->getOperand(0), m_And(m_Value(), m_Value()))) {
diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
index 16b3913f52028..32b12e9c52c39 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
@@ -13,14 +13,14 @@ define void @cmps() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv16i8 = icmp slt <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv8i16 = icmp ult <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv4i32 = icmp sge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf16 = fcmp oge half undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %cf16 = fcmp oge half undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cf32 = fcmp ogt float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cf64 = fcmp ogt double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %cbf64 = fcmp ogt bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cfv816 = fcmp olt <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cfv432 = fcmp oge <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cfv264 = fcmp oge <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c8 = icmp slt i8 undef, undef
@@ -50,17 +50,17 @@ define void @andcmp() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %a16 = and i16 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c16 = icmp ne i16 %a16, 0
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %a32 = and i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32 = icmp eq i32 %a32, 0
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c32 = icmp eq i32 %a32, 0
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %a64 = and i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64 = icmp ne i64 %a64, 0
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32ge = icmp sge i32 %a32, 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32le = icmp slt i32 %a32, 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32leneg = icmp sle i32 %a32, -1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32gtneg = icmp sgt i32 %a32, -1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64ge = icmp sge i64 %a64, 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64le = icmp slt i64 %a64, 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64 = icmp ne i64 %a64, 0
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c32ge = icmp sge i32 %a32, 1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c32le = icmp slt i32 %a32, 1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c32leneg = icmp sle i32 %a32, -1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c32gtneg = icmp sgt i32 %a32, -1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64ge = icmp sge i64 %a64, 1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64le = icmp slt i64 %a64, 1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64leneg = icmp sle i64 %a64, -1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64gtneg = icmp sgt i64 %a64, -1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp eq i128 %a128, 0
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef
@@ -70,7 +70,7 @@ define void @andcmp() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %av4i32 = and <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv4i32 = icmp ne <4 x i32> %av4i32, zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c32not0 = icmp eq i32 %a32, 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64sle = icmp sle i64 %a64, 0
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %c64sle = icmp sle i64 %a64, 0
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a8 = and i8 undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll
new file mode 100644
index 0000000000000..0bf195f435fe4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll
@@ -0,0 +1,1076 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+define void @fcmp_oeq(i32 %arg) {
+; CHECK-LABEL: 'fcmp_oeq'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp oeq float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp oeq <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp oeq <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp oeq double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp oeq <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp oeq fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp oeq <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp oeq float undef, undef
+  %v2f32 = fcmp oeq <2 x float> undef, undef
+  %v4f32 = fcmp oeq <4 x float> undef, undef
+  %v8f32 = fcmp oeq <8 x float> undef, undef
+  %f64 = fcmp oeq double undef, undef
+  %v2f64 = fcmp oeq <2 x double> undef, undef
+  %v4f64 = fcmp oeq <4 x double> undef, undef
+  %f128 = fcmp oeq fp128 undef, undef
+  %v2f128 = fcmp oeq <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_oeq_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_oeq_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp oeq half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp oeq <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp oeq <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oeq <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_oeq_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp oeq half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp oeq <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp oeq <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp oeq <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp oeq half undef, undef
+  %v2f16 = fcmp oeq <2 x half> undef, undef
+  %v4f16 = fcmp oeq <4 x half> undef, undef
+  %v8f16 = fcmp oeq <8 x half> undef, undef
+  %v16f16 = fcmp oeq <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_oeq_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_oeq_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp oeq bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp oeq bfloat undef, undef
+  %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
+  %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
+  %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+  %v16bf16 = fcmp oeq <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ogt(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ogt'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ogt float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp ogt <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp ogt <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ogt double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp ogt <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ogt fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ogt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ogt float undef, undef
+  %v2f32 = fcmp ogt <2 x float> undef, undef
+  %v4f32 = fcmp ogt <4 x float> undef, undef
+  %v8f32 = fcmp ogt <8 x float> undef, undef
+  %f64 = fcmp ogt double undef, undef
+  %v2f64 = fcmp ogt <2 x double> undef, undef
+  %v4f64 = fcmp ogt <4 x double> undef, undef
+  %f128 = fcmp ogt fp128 undef, undef
+  %v2f128 = fcmp ogt <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ogt_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ogt_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ogt half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ogt <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ogt <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ogt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ogt_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ogt half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp ogt <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp ogt <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp ogt <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ogt half undef, undef
+  %v2f16 = fcmp ogt <2 x half> undef, undef
+  %v4f16 = fcmp ogt <4 x half> undef, undef
+  %v8f16 = fcmp ogt <8 x half> undef, undef
+  %v16f16 = fcmp ogt <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ogt_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ogt_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ogt bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ogt bfloat undef, undef
+  %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ogt <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_oge(i32 %arg) {
+; CHECK-LABEL: 'fcmp_oge'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp oge float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp oge <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp oge <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp oge double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp oge <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp oge fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp oge <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp oge float undef, undef
+  %v2f32 = fcmp oge <2 x float> undef, undef
+  %v4f32 = fcmp oge <4 x float> undef, undef
+  %v8f32 = fcmp oge <8 x float> undef, undef
+  %f64 = fcmp oge double undef, undef
+  %v2f64 = fcmp oge <2 x double> undef, undef
+  %v4f64 = fcmp oge <4 x double> undef, undef
+  %f128 = fcmp oge fp128 undef, undef
+  %v2f128 = fcmp oge <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_oge_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_oge_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp oge half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp oge <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp oge <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oge <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_oge_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp oge half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp oge <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp oge <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp oge <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp oge half undef, undef
+  %v2f16 = fcmp oge <2 x half> undef, undef
+  %v4f16 = fcmp oge <4 x half> undef, undef
+  %v8f16 = fcmp oge <8 x half> undef, undef
+  %v16f16 = fcmp oge <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_oge_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_oge_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp oge bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp oge bfloat undef, undef
+  %v2bf16 = fcmp oge <2 x bfloat> undef, undef
+  %v4bf16 = fcmp oge <4 x bfloat> undef, undef
+  %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+  %v16bf16 = fcmp oge <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_olt(i32 %arg) {
+; CHECK-LABEL: 'fcmp_olt'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp olt float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp olt <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp olt <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp olt double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp olt <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp olt fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp olt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp olt float undef, undef
+  %v2f32 = fcmp olt <2 x float> undef, undef
+  %v4f32 = fcmp olt <4 x float> undef, undef
+  %v8f32 = fcmp olt <8 x float> undef, undef
+  %f64 = fcmp olt double undef, undef
+  %v2f64 = fcmp olt <2 x double> undef, undef
+  %v4f64 = fcmp olt <4 x double> undef, undef
+  %f128 = fcmp olt fp128 undef, undef
+  %v2f128 = fcmp olt <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_olt_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_olt_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp olt half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_olt_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp olt half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp olt half undef, undef
+  %v2f16 = fcmp olt <2 x half> undef, undef
+  %v4f16 = fcmp olt <4 x half> undef, undef
+  %v8f16 = fcmp olt <8 x half> undef, undef
+  %v16f16 = fcmp olt <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_olt_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_olt_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp olt bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp olt bfloat undef, undef
+  %v2bf16 = fcmp olt <2 x bfloat> undef, undef
+  %v4bf16 = fcmp olt <4 x bfloat> undef, undef
+  %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+  %v16bf16 = fcmp olt <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ole(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ole'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ole float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp ole <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp ole <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ole double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp ole <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ole fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ole <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ole float undef, undef
+  %v2f32 = fcmp ole <2 x float> undef, undef
+  %v4f32 = fcmp ole <4 x float> undef, undef
+  %v8f32 = fcmp ole <8 x float> undef, undef
+  %f64 = fcmp ole double undef, undef
+  %v2f64 = fcmp ole <2 x double> undef, undef
+  %v4f64 = fcmp ole <4 x double> undef, undef
+  %f128 = fcmp ole fp128 undef, undef
+  %v2f128 = fcmp ole <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ole_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ole_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ole half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ole <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ole <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ole <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ole_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ole half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp ole <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp ole <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp ole <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ole half undef, undef
+  %v2f16 = fcmp ole <2 x half> undef, undef
+  %v4f16 = fcmp ole <4 x half> undef, undef
+  %v8f16 = fcmp ole <8 x half> undef, undef
+  %v16f16 = fcmp ole <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ole_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ole_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ole bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ole bfloat undef, undef
+  %v2bf16 = fcmp ole <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ole <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ole <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_one(i32 %arg) {
+; CHECK-LABEL: 'fcmp_one'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f32 = fcmp one float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f32 = fcmp one <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f32 = fcmp one <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v8f32 = fcmp one <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f64 = fcmp one double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f64 = fcmp one <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v4f64 = fcmp one <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp one fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp one <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp one float undef, undef
+  %v2f32 = fcmp one <2 x float> undef, undef
+  %v4f32 = fcmp one <4 x float> undef, undef
+  %v8f32 = fcmp one <8 x float> undef, undef
+  %f64 = fcmp one double undef, undef
+  %v2f64 = fcmp one <2 x double> undef, undef
+  %v4f64 = fcmp one <4 x double> undef, undef
+  %f128 = fcmp one fp128 undef, undef
+  %v2f128 = fcmp one <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_one_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_one_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %f16 = fcmp one half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp one <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp one <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp one <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_one_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f16 = fcmp one half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f16 = fcmp one <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f16 = fcmp one <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v8f16 = fcmp one <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 6 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp one half undef, undef
+  %v2f16 = fcmp one <2 x half> undef, undef
+  %v4f16 = fcmp one <4 x half> undef, undef
+  %v8f16 = fcmp one <8 x half> undef, undef
+  %v16f16 = fcmp one <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_one_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_one_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %bf16 = fcmp one bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp one bfloat undef, undef
+  %v2bf16 = fcmp one <2 x bfloat> undef, undef
+  %v4bf16 = fcmp one <4 x bfloat> undef, undef
+  %v8bf16 = fcmp one <8 x bfloat> undef, undef
+  %v16bf16 = fcmp one <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ord(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ord'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ord float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f32 = fcmp ord <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f32 = fcmp ord <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v8f32 = fcmp ord <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ord double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f64 = fcmp ord <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v4f64 = fcmp ord <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ord fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ord <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ord float undef, undef
+  %v2f32 = fcmp ord <2 x float> undef, undef
+  %v4f32 = fcmp ord <4 x float> undef, undef
+  %v8f32 = fcmp ord <8 x float> undef, undef
+  %f64 = fcmp ord double undef, undef
+  %v2f64 = fcmp ord <2 x double> undef, undef
+  %v4f64 = fcmp ord <4 x double> undef, undef
+  %f128 = fcmp ord fp128 undef, undef
+  %v2f128 = fcmp ord <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ord_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ord_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ord half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp ord <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp ord <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp ord <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ord_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ord half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f16 = fcmp ord <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f16 = fcmp ord <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v8f16 = fcmp ord <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 6 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ord half undef, undef
+  %v2f16 = fcmp ord <2 x half> undef, undef
+  %v4f16 = fcmp ord <4 x half> undef, undef
+  %v8f16 = fcmp ord <8 x half> undef, undef
+  %v16f16 = fcmp ord <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ord_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ord_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ord bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ord bfloat undef, undef
+  %v2bf16 = fcmp ord <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ord <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ord <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ueq(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ueq'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f32 = fcmp ueq float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f32 = fcmp ueq <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f32 = fcmp ueq <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v8f32 = fcmp ueq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f64 = fcmp ueq double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f64 = fcmp ueq <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %v4f64 = fcmp ueq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ueq fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ueq <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ueq float undef, undef
+  %v2f32 = fcmp ueq <2 x float> undef, undef
+  %v4f32 = fcmp ueq <4 x float> undef, undef
+  %v8f32 = fcmp ueq <8 x float> undef, undef
+  %f64 = fcmp ueq double undef, undef
+  %v2f64 = fcmp ueq <2 x double> undef, undef
+  %v4f64 = fcmp ueq <4 x double> undef, undef
+  %f128 = fcmp ueq fp128 undef, undef
+  %v2f128 = fcmp ueq <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ueq_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ueq_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %f16 = fcmp ueq half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp ueq <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp ueq <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp ueq <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ueq_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %f16 = fcmp ueq half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v2f16 = fcmp ueq <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v4f16 = fcmp ueq <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %v8f16 = fcmp ueq <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 6 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ueq half undef, undef
+  %v2f16 = fcmp ueq <2 x half> undef, undef
+  %v4f16 = fcmp ueq <4 x half> undef, undef
+  %v8f16 = fcmp ueq <8 x half> undef, undef
+  %v16f16 = fcmp ueq <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ueq_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ueq_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %bf16 = fcmp ueq bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ueq bfloat undef, undef
+  %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ueq <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ugt(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ugt'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ugt float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp ugt <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp ugt <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ugt double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp ugt <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ugt fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ugt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ugt float undef, undef
+  %v2f32 = fcmp ugt <2 x float> undef, undef
+  %v4f32 = fcmp ugt <4 x float> undef, undef
+  %v8f32 = fcmp ugt <8 x float> undef, undef
+  %f64 = fcmp ugt double undef, undef
+  %v2f64 = fcmp ugt <2 x double> undef, undef
+  %v4f64 = fcmp ugt <4 x double> undef, undef
+  %f128 = fcmp ugt fp128 undef, undef
+  %v2f128 = fcmp ugt <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ugt_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ugt_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ugt half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ugt <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ugt <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ugt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ugt_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ugt half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp ugt <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp ugt <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp ugt <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ugt half undef, undef
+  %v2f16 = fcmp ugt <2 x half> undef, undef
+  %v4f16 = fcmp ugt <4 x half> undef, undef
+  %v8f16 = fcmp ugt <8 x half> undef, undef
+  %v16f16 = fcmp ugt <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ugt_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ugt_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ugt bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ugt bfloat undef, undef
+  %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ugt <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_uge(i32 %arg) {
+; CHECK-LABEL: 'fcmp_uge'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp uge float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp uge <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp uge <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp uge double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp uge <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp uge fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp uge <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp uge float undef, undef
+  %v2f32 = fcmp uge <2 x float> undef, undef
+  %v4f32 = fcmp uge <4 x float> undef, undef
+  %v8f32 = fcmp uge <8 x float> undef, undef
+  %f64 = fcmp uge double undef, undef
+  %v2f64 = fcmp uge <2 x double> undef, undef
+  %v4f64 = fcmp uge <4 x double> undef, undef
+  %f128 = fcmp uge fp128 undef, undef
+  %v2f128 = fcmp uge <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_uge_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_uge_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp uge half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp uge <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp uge <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uge <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_uge_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp uge half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp uge <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp uge <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp uge <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp uge half undef, undef
+  %v2f16 = fcmp uge <2 x half> undef, undef
+  %v4f16 = fcmp uge <4 x half> undef, undef
+  %v8f16 = fcmp uge <8 x half> undef, undef
+  %v16f16 = fcmp uge <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_uge_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_uge_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp uge bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp uge bfloat undef, undef
+  %v2bf16 = fcmp uge <2 x bfloat> undef, undef
+  %v4bf16 = fcmp uge <4 x bfloat> undef, undef
+  %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+  %v16bf16 = fcmp uge <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ult(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ult'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ult float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp ult <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp ult <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ult double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp ult <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ult fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ult <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ult float undef, undef
+  %v2f32 = fcmp ult <2 x float> undef, undef
+  %v4f32 = fcmp ult <4 x float> undef, undef
+  %v8f32 = fcmp ult <8 x float> undef, undef
+  %f64 = fcmp ult double undef, undef
+  %v2f64 = fcmp ult <2 x double> undef, undef
+  %v4f64 = fcmp ult <4 x double> undef, undef
+  %f128 = fcmp ult fp128 undef, undef
+  %v2f128 = fcmp ult <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ult_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ult_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ult half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ult <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ult <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ult <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ult_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ult half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp ult <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp ult <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp ult <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ult half undef, undef
+  %v2f16 = fcmp ult <2 x half> undef, undef
+  %v4f16 = fcmp ult <4 x half> undef, undef
+  %v8f16 = fcmp ult <8 x half> undef, undef
+  %v16f16 = fcmp ult <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ult_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ult_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ult bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ult bfloat undef, undef
+  %v2bf16 = fcmp ult <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ult <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ult <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_ule(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ule'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp ule float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp ule <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp ule <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp ule double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp ule <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp ule fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp ule <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp ule float undef, undef
+  %v2f32 = fcmp ule <2 x float> undef, undef
+  %v4f32 = fcmp ule <4 x float> undef, undef
+  %v8f32 = fcmp ule <8 x float> undef, undef
+  %f64 = fcmp ule double undef, undef
+  %v2f64 = fcmp ule <2 x double> undef, undef
+  %v4f64 = fcmp ule <4 x double> undef, undef
+  %f128 = fcmp ule fp128 undef, undef
+  %v2f128 = fcmp ule <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_ule_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_ule_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ule half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ule <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ule <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ule <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_ule_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp ule half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp ule <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp ule <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp ule <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp ule half undef, undef
+  %v2f16 = fcmp ule <2 x half> undef, undef
+  %v4f16 = fcmp ule <4 x half> undef, undef
+  %v8f16 = fcmp ule <8 x half> undef, undef
+  %v16f16 = fcmp ule <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_ule_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_ule_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ule bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp ule bfloat undef, undef
+  %v2bf16 = fcmp ule <2 x bfloat> undef, undef
+  %v4bf16 = fcmp ule <4 x bfloat> undef, undef
+  %v8bf16 = fcmp ule <8 x bfloat> undef, undef
+  %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_une(i32 %arg) {
+; CHECK-LABEL: 'fcmp_une'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp une float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp une <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp une <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp une double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp une <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp une fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp une <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp une float undef, undef
+  %v2f32 = fcmp une <2 x float> undef, undef
+  %v4f32 = fcmp une <4 x float> undef, undef
+  %v8f32 = fcmp une <8 x float> undef, undef
+  %f64 = fcmp une double undef, undef
+  %v2f64 = fcmp une <2 x double> undef, undef
+  %v4f64 = fcmp une <4 x double> undef, undef
+  %f128 = fcmp une fp128 undef, undef
+  %v2f128 = fcmp une <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_une_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_une_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp une half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp une <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp une <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp une <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_une_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp une half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp une <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp une <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp une <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp une half undef, undef
+  %v2f16 = fcmp une <2 x half> undef, undef
+  %v4f16 = fcmp une <4 x half> undef, undef
+  %v8f16 = fcmp une <8 x half> undef, undef
+  %v16f16 = fcmp une <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_une_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_une_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp une bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp une bfloat undef, undef
+  %v2bf16 = fcmp une <2 x bfloat> undef, undef
+  %v4bf16 = fcmp une <4 x bfloat> undef, undef
+  %v8bf16 = fcmp une <8 x bfloat> undef, undef
+  %v16bf16 = fcmp une <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_uno(i32 %arg) {
+; CHECK-LABEL: 'fcmp_uno'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp uno float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp uno <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp uno <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp uno double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp uno <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp uno fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp uno <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp uno float undef, undef
+  %v2f32 = fcmp uno <2 x float> undef, undef
+  %v4f32 = fcmp uno <4 x float> undef, undef
+  %v8f32 = fcmp uno <8 x float> undef, undef
+  %f64 = fcmp uno double undef, undef
+  %v2f64 = fcmp uno <2 x double> undef, undef
+  %v4f64 = fcmp uno <4 x double> undef, undef
+  %f128 = fcmp uno fp128 undef, undef
+  %v2f128 = fcmp uno <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_uno_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_uno_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp uno half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp uno <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp uno <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uno <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_uno_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp uno half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp uno <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp uno <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp uno <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp uno half undef, undef
+  %v2f16 = fcmp uno <2 x half> undef, undef
+  %v4f16 = fcmp uno <4 x half> undef, undef
+  %v8f16 = fcmp uno <8 x half> undef, undef
+  %v16f16 = fcmp uno <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_uno_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_uno_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp uno bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp uno bfloat undef, undef
+  %v2bf16 = fcmp uno <2 x bfloat> undef, undef
+  %v4bf16 = fcmp uno <4 x bfloat> undef, undef
+  %v8bf16 = fcmp uno <8 x bfloat> undef, undef
+  %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_true(i32 %arg) {
+; CHECK-LABEL: 'fcmp_true'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp true float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp true <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp true <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp true double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp true <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp true fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp true <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp true float undef, undef
+  %v2f32 = fcmp true <2 x float> undef, undef
+  %v4f32 = fcmp true <4 x float> undef, undef
+  %v8f32 = fcmp true <8 x float> undef, undef
+  %f64 = fcmp true double undef, undef
+  %v2f64 = fcmp true <2 x double> undef, undef
+  %v4f64 = fcmp true <4 x double> undef, undef
+  %f128 = fcmp true fp128 undef, undef
+  %v2f128 = fcmp true <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_true_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_true_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp true half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp true <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp true <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp true <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_true_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp true half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp true <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp true <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp true <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp true half undef, undef
+  %v2f16 = fcmp true <2 x half> undef, undef
+  %v4f16 = fcmp true <4 x half> undef, undef
+  %v8f16 = fcmp true <8 x half> undef, undef
+  %v16f16 = fcmp true <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_true_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_true_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp true bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp true bfloat undef, undef
+  %v2bf16 = fcmp true <2 x bfloat> undef, undef
+  %v4bf16 = fcmp true <4 x bfloat> undef, undef
+  %v8bf16 = fcmp true <8 x bfloat> undef, undef
+  %v16bf16 = fcmp true <16 x bfloat> undef, undef
+  ret void
+}
+
+define void @fcmp_false(i32 %arg) {
+; CHECK-LABEL: 'fcmp_false'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f32 = fcmp false float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f32 = fcmp false <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f32 = fcmp false <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f64 = fcmp false double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f64 = fcmp false <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %f128 = fcmp false fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %v2f128 = fcmp false <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f32 = fcmp false float undef, undef
+  %v2f32 = fcmp false <2 x float> undef, undef
+  %v4f32 = fcmp false <4 x float> undef, undef
+  %v8f32 = fcmp false <8 x float> undef, undef
+  %f64 = fcmp false double undef, undef
+  %v2f64 = fcmp false <2 x double> undef, undef
+  %v4f64 = fcmp false <4 x double> undef, undef
+  %f128 = fcmp false fp128 undef, undef
+  %v2f128 = fcmp false <2 x fp128> undef, undef
+  ret void
+}
+
+define void @fcmp_false_half(i32 %arg) {
+; CHECK-BASE-LABEL: 'fcmp_false_half'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp false half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp false <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp false <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp false <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fcmp_false_half'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %f16 = fcmp false half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v2f16 = fcmp false <2 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v4f16 = fcmp false <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %v8f16 = fcmp false <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %f16 = fcmp false half undef, undef
+  %v2f16 = fcmp false <2 x half> undef, undef
+  %v4f16 = fcmp false <4 x half> undef, undef
+  %v8f16 = fcmp false <8 x half> undef, undef
+  %v16f16 = fcmp false <16 x half> undef, undef
+  ret void
+}
+
+define void @fcmp_false_bfloat(i32 %arg) {
+; CHECK-LABEL: 'fcmp_false_bfloat'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp false bfloat undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %bf16 = fcmp false bfloat undef, undef
+  %v2bf16 = fcmp false <2 x bfloat> undef, undef
+  %v4bf16 = fcmp false <4 x bfloat> undef, undef
+  %v8bf16 = fcmp false <8 x bfloat> undef, undef
+  %v16bf16 = fcmp false <16 x bfloat> undef, undef
+  ret void
+}
+
diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll
index 76bd5c9bb0af6..c667cf2d5bbb2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/select.ll
@@ -11,17 +11,17 @@ define void @select() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4 = select i1 undef, i64 undef, i64 undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v5 = select i1 undef, float undef, float undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v6 = select i1 undef, double undef, double undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:1 Lat:1 SizeLat:1 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:1 Lat:1 SizeLat:1 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:1 Lat:1 SizeLat:1 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v1 = select i1 undef, i8 undef, i8 undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
index 9a0e5c3eb7964..6bb1a2e9d2b19 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
@@ -58,10 +58,10 @@ define <vscale x 32 x i1> @cmp_nxv32i1() {
 ; Check fcmp for legal FP vectors
 define void @cmp_legal_fp() #0 {
 ; CHECK-LABEL: 'cmp_legal_fp'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %1 = fcmp oge <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %2 = fcmp oge <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %3 = fcmp oge <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %1 = fcmp oge <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %2 = fcmp oge <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %3 = fcmp oge <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:5 Lat:5 SizeLat:5 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fcmp oge <vscale x 2 x double> undef, undef
@@ -74,7 +74,7 @@ define void @cmp_legal_fp() #0 {
 ; Check fcmp for an illegal FP vector
 define <vscale x 16 x i1> @cmp_nxv16f16() {
 ; CHECK-LABEL: 'cmp_nxv16f16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = fcmp oge <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %res = fcmp oge <vscale x 16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 16 x i1> %res
 ;
   %res = fcmp oge <vscale x 16 x half> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index d568f4b2c0b5c..3d2fd30954908 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -84,7 +84,7 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
 ; COST-LABEL: 'v3i64_select_sle'
 ; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = icmp sle <3 x i64> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c
+; COST-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i64> %s.1
 ;
   %cmp.1 = icmp sle <3 x i64> %a, %b
@@ -94,7 +94,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
 
 define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond) {
 ; COST-LABEL: 'v2i64_select_no_cmp'
-; COST-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %s.1
 ;
   %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
@@ -103,12 +103,12 @@ define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond
 
 define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_ogt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp ogt <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_ogt'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ogt <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -119,12 +119,12 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ogt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_ogt'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -135,7 +135,7 @@ define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_ogt(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ogt <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -146,7 +146,7 @@ define <2 x float> @v2f32_select_ogt(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_ogt(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ogt <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -157,7 +157,7 @@ define <4 x float> @v4f32_select_ogt(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ogt <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -168,7 +168,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -179,7 +179,7 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -190,12 +190,12 @@ define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_oge'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp oge <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_oge'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oge <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -206,12 +206,12 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_oge'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_oge'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -222,7 +222,7 @@ define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_oge(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_oge'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oge <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -233,7 +233,7 @@ define <2 x float> @v2f32_select_oge(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_oge(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_oge'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oge <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -244,7 +244,7 @@ define <4 x float> @v4f32_select_oge(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_oge'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oge <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -255,7 +255,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_oge'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -266,7 +266,7 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_oge'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -277,12 +277,12 @@ define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_olt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp olt <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_olt'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp olt <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -293,12 +293,12 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_olt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_olt'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -309,7 +309,7 @@ define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_olt(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_olt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp olt <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -320,7 +320,7 @@ define <2 x float> @v2f32_select_olt(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_olt(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_olt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp olt <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -331,7 +331,7 @@ define <4 x float> @v4f32_select_olt(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_olt'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp olt <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -342,7 +342,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_olt'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -353,7 +353,7 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_olt'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -364,12 +364,12 @@ define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_ole'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp ole <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_ole'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ole <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -380,12 +380,12 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ole'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_ole'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -396,7 +396,7 @@ define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_ole(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_ole'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ole <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -407,7 +407,7 @@ define <2 x float> @v2f32_select_ole(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_ole(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_ole'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ole <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -418,7 +418,7 @@ define <4 x float> @v4f32_select_ole(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_ole'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp ole <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -429,7 +429,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ole'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -440,7 +440,7 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ole'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -451,12 +451,12 @@ define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_oeq'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp oeq <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_oeq'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oeq <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -467,12 +467,12 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_oeq'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_oeq'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -483,7 +483,7 @@ define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_oeq(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oeq <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -494,7 +494,7 @@ define <2 x float> @v2f32_select_oeq(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_oeq(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oeq <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -505,7 +505,7 @@ define <4 x float> @v4f32_select_oeq(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp oeq <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -516,7 +516,7 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -527,7 +527,7 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -538,13 +538,13 @@ define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_one'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %cmp.1 = fcmp one <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_one'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp one <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
   %cmp.1 = fcmp one <4 x half> %a, %b
@@ -554,13 +554,13 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_one'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp one <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_one'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp one <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
   %cmp.1 = fcmp one <8 x half> %a, %b
@@ -570,8 +570,8 @@ define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_one'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <2 x float> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp one <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
   %cmp.1 = fcmp one <2 x float> %a, %b
@@ -581,8 +581,8 @@ define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_one'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <4 x float> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp one <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
   %cmp.1 = fcmp one <4 x float> %a, %b
@@ -592,8 +592,8 @@ define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_one'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <2 x double> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp one <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
   %cmp.1 = fcmp one <2 x double> %a, %b
@@ -603,7 +603,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_one'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -614,7 +614,7 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_one'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -625,12 +625,12 @@ define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_une'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp une <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_une'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp une <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
@@ -641,12 +641,12 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_une'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_une'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -657,7 +657,7 @@ define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_une(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_une'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp une <2 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
@@ -668,7 +668,7 @@ define <2 x float> @v2f32_select_une(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_une(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_une'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp une <4 x float> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
@@ -679,7 +679,7 @@ define <4 x float> @v4f32_select_une(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_une'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cmp.1 = fcmp une <2 x double> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
@@ -690,7 +690,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_une'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -701,7 +701,7 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_une'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
@@ -712,13 +712,13 @@ define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bf
 
 define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-NOFP16-LABEL: 'v4f16_select_ord'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %cmp.1 = fcmp ord <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v4f16_select_ord'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <4 x half> %a, %b
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp ord <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %s.1
 ;
   %cmp.1 = fcmp ord <4 x half> %a, %b
@@ -728,13 +728,13 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ord'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp ord <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
 ; COST-FULLFP16-LABEL: 'v8f16_select_ord'
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
-; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp ord <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
   %cmp.1 = fcmp ord <8 x half> %a, %b
@@ -744,8 +744,8 @@ define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 
 define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: 'v2f32_select_ord'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <2 x float> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp ord <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %s.1
 ;
   %cmp.1 = fcmp ord <2 x float> %a, %b
@@ -755,8 +755,8 @@ define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float>
 
 define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: 'v4f32_select_ord'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <4 x float> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp ord <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %s.1
 ;
   %cmp.1 = fcmp ord <4 x float> %a, %b
@@ -766,8 +766,8 @@ define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float>
 
 define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: 'v2f64_select_ord'
-; COST-NEXT:  Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <2 x double> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:6 SizeLat:3 for: %cmp.1 = fcmp ord <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 1 for: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %s.1
 ;
   %cmp.1 = fcmp ord <2 x double> %a, %b
@@ -777,7 +777,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x dou
 
 define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ord'
-; COST-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
@@ -788,7 +788,7 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ord'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 6029095bbe7b1..58376f78ee709 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -917,32 +917,23 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFNONE-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
 ; TFNONE-NEXT:  [[ENTRY:.*]]:
 ; TFNONE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
-; TFNONE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2
-; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; TFNONE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
 ; TFNONE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; TFNONE:       [[VECTOR_PH]]:
-; TFNONE-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFNONE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
+; TFNONE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
 ; TFNONE-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; TFNONE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
 ; TFNONE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; TFNONE:       [[VECTOR_BODY]]:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; TFNONE-NEXT:    [[TMP7:%.*]] = load double, ptr [[P2]], align 8
-; TFNONE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[TMP7]], i64 0
-; TFNONE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
-; TFNONE-NEXT:    [[TMP8:%.*]] = call <vscale x 2 x double> @exp_masked_scalable(<vscale x 2 x double> [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true))
-; TFNONE-NEXT:    [[TMP9:%.*]] = fcmp ogt <vscale x 2 x double> [[TMP8]], zeroinitializer
-; TFNONE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> splat (double 1.000000e+00)
-; TFNONE-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
-; TFNONE-NEXT:    [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2
-; TFNONE-NEXT:    [[TMP13:%.*]] = sub i32 [[TMP12]], 1
-; TFNONE-NEXT:    [[TMP14:%.*]] = extractelement <vscale x 2 x double> [[PREDPHI]], i32 [[TMP13]]
+; TFNONE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i64 0
+; TFNONE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFNONE-NEXT:    [[TMP2:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
+; TFNONE-NEXT:    [[TMP3:%.*]] = fcmp ogt <2 x double> [[TMP2]], zeroinitializer
+; TFNONE-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
+; TFNONE-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
 ; TFNONE-NEXT:    store double [[TMP14]], ptr [[P]], align 8
-; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; TFNONE-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; TFNONE-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; TFNONE:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
index 0a6b86c953ee0..2370505610f6c 100644
--- a/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
+++ b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
@@ -9,10 +9,6 @@ define void @test() {
 ;
 ; AARCH64-LABEL: @test(
 ; AARCH64-NEXT:  entry:
-; AARCH64-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
-; AARCH64-NEXT:    [[TOBOOL:%.*]] = fcmp une half [[TMP0]], 0xH0000
-; AARCH64-NEXT:    [[TMP1:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
-; AARCH64-NEXT:    [[TOBOOL3:%.*]] = fcmp une half [[TMP1]], 0xH0000
 ; AARCH64-NEXT:    ret void
 ;
 entry:



More information about the llvm-commits mailing list