[llvm] cb353dc - [LV] Add cost model for simd vector select instructions of type float

Mon Jun 19 22:14:22 PDT 2023

Author: Zhongyunde
Date: 2023-06-20T13:12:19+08:00
New Revision: cb353dc74e72b084d54d10515fbc289cc19a2023

URL: https://github.com/llvm/llvm-project/commit/cb353dc74e72b084d54d10515fbc289cc19a2023
DIFF: https://github.com/llvm/llvm-project/commit/cb353dc74e72b084d54d10515fbc289cc19a2023.diff

LOG: [LV] Add cost model for simd vector select instructions of type float

For simd vector selects, use cmeq + bsl for v2f32/v4f32/v2f64, so their cost are cheep.
Fix https://github.com/llvm/llvm-project/issues/63082

Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D152523

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
    llvm/test/Analysis/CostModel/AArch64/select.ll
    llvm/test/Analysis/CostModel/AArch64/vector-select.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8a00325fff869..bcadd65fc781e 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2694,6 +2694,11 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 
     static const TypeConversionCostTblEntry
     VectorSelectTbl[] = {
+      { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
+      { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
+      { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
+      { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
+      { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
       { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
       { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
       { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index c10f87fcef658..afa1d5bdd3161 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -163,9 +163,9 @@ define void @reduce_smax() {
 define void @reduce_fmin16() {
 ; CHECK-NOF16-LABEL: 'reduce_fmin16'
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
@@ -197,9 +197,9 @@ define void @reduce_fmin16() {
 define void @reduce_fmax16() {
 ; CHECK-NOF16-LABEL: 'reduce_fmax16'
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll
index d493ebb349ba0..a69d02bb562ef 100644
--- a/llvm/test/Analysis/CostModel/AArch64/select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/select.ll
@@ -18,6 +18,11 @@ define void @select() {
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SIZE-LABEL: 'select'
@@ -33,6 +38,11 @@ define void @select() {
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v1 = select i1 undef, i8 undef, i8 undef
@@ -53,5 +63,11 @@ define void @select() {
   %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
   %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
 
+  ; simd vector float
+  %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+  %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+  %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+  %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index 7d36ec2281978..40062605c47c2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -158,7 +158,7 @@ define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond
 define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_ogt
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp ogt <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ogt <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -176,7 +176,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_ogt
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp ogt <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ogt <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -242,7 +242,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_oge
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp oge <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oge <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -260,7 +260,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_oge
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp oge <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oge <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -326,7 +326,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_olt
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp olt <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp olt <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -344,7 +344,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_olt
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp olt <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp olt <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -410,7 +410,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_ole
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp ole <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ole <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -428,7 +428,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_ole
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp ole <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ole <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -494,7 +494,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_oeq
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp oeq <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oeq <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -512,7 +512,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_oeq
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp oeq <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oeq <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -578,9 +578,9 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_one
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
 ; CODE-LABEL: v4f16_select_one
 ; CODE:       bb.0
@@ -598,9 +598,9 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_one
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:  %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
 ; CODE-LABEL: v8f16_select_one
 ; CODE:       bb.0
@@ -618,7 +618,7 @@ define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c)
 define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: v2f32_select_one
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <2 x float> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 
 ; CODE-LABEL: v2f32_select_one
 ; CODE:       bb.0
@@ -636,7 +636,7 @@ define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float>
 define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: v4f32_select_one
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <4 x float> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 
 ; CODE-LABEL: v4f32_select_one
 ; CODE:       bb.0
@@ -654,7 +654,7 @@ define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float>
 define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: v2f64_select_one
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <2 x double> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ;
 ; CODE-LABEL: v2f64_select_one
 ; CODE:       bb.0
@@ -672,7 +672,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; COST-LABEL: v4f16_select_une
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %cmp.1 = fcmp une <4 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <4 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
 ;
@@ -690,7 +690,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-LABEL: v8f16_select_une
 ; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp une <8 x half> %a, %b
-; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <8 x half> %a, %b
 ; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ;
@@ -756,7 +756,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou
 define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: v2f32_select_ord
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <2 x float> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
 ;
 ; CODE-LABEL: v2f32_select_ord
 ; CODE:       bb.0
@@ -774,7 +774,7 @@ define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float>
 define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; COST-LABEL: v4f32_select_ord
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <4 x float> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
 
 ; CODE-LABEL: v4f32_select_ord
 ; CODE:       bb.0
@@ -792,7 +792,7 @@ define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float>
 define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; COST-LABEL: v2f64_select_ord
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <2 x double> %a, %b
-; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
 ;
 ; CODE-LABEL: v2f64_select_ord
 ; CODE:       bb.0

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index c25d68c9b67ad..b98352524e185 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -133,37 +133,50 @@ for.end:                                          ; preds = %for.cond.cleanup
 define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK-LABEL: @loop2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000
-; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000
-; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[B]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[C]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000
+; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[C]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    [[BOUND04:%.*]] = icmp ugt ptr [[UGLYGEP3]], [[B]]
-; CHECK-NEXT:    [[BOUND15:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]]
+; CHECK-NEXT:    [[BOUND04:%.*]] = icmp ugt ptr [[SCEVGEP3]], [[B]]
+; CHECK-NEXT:    [[BOUND15:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
 ; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
 ; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT10]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !alias.scope !7
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD7]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD8]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd <4 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT:    br i1 [[TMP6]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD12]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD13]]
+; CHECK-NEXT:    [[PREDPHI14:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]]
+; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT:    store <4 x float> [[PREDPHI14]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       loop.body:
 ; CHECK-NEXT:    [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]