[llvm] cb353dc - [LV] Add cost model for simd vector select instructions of type float
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 19 22:14:22 PDT 2023
Author: Zhongyunde
Date: 2023-06-20T13:12:19+08:00
New Revision: cb353dc74e72b084d54d10515fbc289cc19a2023
URL: https://github.com/llvm/llvm-project/commit/cb353dc74e72b084d54d10515fbc289cc19a2023
DIFF: https://github.com/llvm/llvm-project/commit/cb353dc74e72b084d54d10515fbc289cc19a2023.diff
LOG: [LV] Add cost model for simd vector select instructions of type float
For simd vector selects, use cmeq + bsl for v2f32/v4f32/v2f64, so their cost are cheep.
Fix https://github.com/llvm/llvm-project/issues/63082
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D152523
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
llvm/test/Analysis/CostModel/AArch64/select.ll
llvm/test/Analysis/CostModel/AArch64/vector-select.ll
llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8a00325fff869..bcadd65fc781e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2694,6 +2694,11 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
+ { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index c10f87fcef658..afa1d5bdd3161 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -163,9 +163,9 @@ define void @reduce_smax() {
define void @reduce_fmin16() {
; CHECK-NOF16-LABEL: 'reduce_fmin16'
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
@@ -197,9 +197,9 @@ define void @reduce_fmin16() {
define void @reduce_fmax16() {
; CHECK-NOF16-LABEL: 'reduce_fmax16'
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll
index d493ebb349ba0..a69d02bb562ef 100644
--- a/llvm/test/Analysis/CostModel/AArch64/select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/select.ll
@@ -18,6 +18,11 @@ define void @select() {
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-SIZE-LABEL: 'select'
@@ -33,6 +38,11 @@ define void @select() {
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%v1 = select i1 undef, i8 undef, i8 undef
@@ -53,5 +63,11 @@ define void @select() {
%v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
%v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+ ; simd vector float
+ %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+ %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+ %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+ %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+ %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index 7d36ec2281978..40062605c47c2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -158,7 +158,7 @@ define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond
define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_ogt
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp ogt <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ogt <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -176,7 +176,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_ogt
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp ogt <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ogt <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -242,7 +242,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_oge
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp oge <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oge <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -260,7 +260,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_oge
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp oge <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oge <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -326,7 +326,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_olt
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp olt <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp olt <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -344,7 +344,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_olt
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp olt <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp olt <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -410,7 +410,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_ole
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp ole <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ole <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -428,7 +428,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_ole
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp ole <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ole <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -494,7 +494,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_oeq
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp oeq <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oeq <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -512,7 +512,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_oeq
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp oeq <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oeq <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -578,9 +578,9 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_one
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <4 x half> %a, %b
-; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
; CODE-LABEL: v4f16_select_one
; CODE: bb.0
@@ -598,9 +598,9 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_one
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <8 x half> %a, %b
-; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
; CODE-LABEL: v8f16_select_one
; CODE: bb.0
@@ -618,7 +618,7 @@ define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c)
define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; COST-LABEL: v2f32_select_one
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x float> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
; CODE-LABEL: v2f32_select_one
; CODE: bb.0
@@ -636,7 +636,7 @@ define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float>
define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; COST-LABEL: v4f32_select_one
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <4 x float> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
; CODE-LABEL: v4f32_select_one
; CODE: bb.0
@@ -654,7 +654,7 @@ define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float>
define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; COST-LABEL: v2f64_select_one
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x double> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
;
; CODE-LABEL: v2f64_select_one
; CODE: bb.0
@@ -672,7 +672,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; COST-LABEL: v4f16_select_une
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp une <4 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp une <4 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
;
@@ -690,7 +690,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; COST-LABEL: v8f16_select_une
; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp une <8 x half> %a, %b
-; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp une <8 x half> %a, %b
; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
;
@@ -756,7 +756,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou
define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; COST-LABEL: v2f32_select_ord
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x float> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
;
; CODE-LABEL: v2f32_select_ord
; CODE: bb.0
@@ -774,7 +774,7 @@ define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float>
define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; COST-LABEL: v4f32_select_ord
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <4 x float> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
; CODE-LABEL: v4f32_select_ord
; CODE: bb.0
@@ -792,7 +792,7 @@ define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float>
define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; COST-LABEL: v2f64_select_ord
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x double> %a, %b
-; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
;
; CODE-LABEL: v2f64_select_ord
; CODE: bb.0
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index c25d68c9b67ad..b98352524e185 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -133,37 +133,50 @@ for.end: ; preds = %for.cond.cleanup
define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
; CHECK-LABEL: @loop2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000
-; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000
-; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[B]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[C]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[C]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[UGLYGEP3]], [[B]]
-; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]]
+; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[SCEVGEP3]], [[B]]
+; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT10]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !alias.scope !7
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD7]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD8]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4
+; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD12]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD13]]
+; CHECK-NEXT: [[PREDPHI14:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]]
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: store <4 x float> [[PREDPHI14]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: loop.body:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]
More information about the llvm-commits
mailing list