[llvm] 4876f43 - Revert "[AArch64][TTI] Cost model FADD/FSUB/FNEG"
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 11 02:14:59 PDT 2023
Author: Sjoerd Meijer
Date: 2023-04-11T10:14:40+01:00
New Revision: 4876f43ea9b2742b2e826c687cdb62da45e40499
URL: https://github.com/llvm/llvm-project/commit/4876f43ea9b2742b2e826c687cdb62da45e40499
DIFF: https://github.com/llvm/llvm-project/commit/4876f43ea9b2742b2e826c687cdb62da45e40499.diff
LOG: Revert "[AArch64][TTI] Cost model FADD/FSUB/FNEG"
This reverts commit d0027e0be990df60f6f826123f035286a168f288.
Need to look at 2 test failures.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
llvm/test/Analysis/CostModel/AArch64/cast.ll
llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll
llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
llvm/test/Analysis/CostModel/AArch64/sve-math.ll
llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 22a598b9f63f..7c57d6ca1aec 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2408,18 +2408,11 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// We know that they are legal. See LowerAdd in ISelLowering.
return LT.first;
- case ISD::FNEG:
case ISD::FADD:
case ISD::FSUB:
- // Increase the cost for half and bfloat types if not architecturally
- // supported.
- if ((Ty->getScalarType()->isHalfTy() && !ST->hasFullFP16()) ||
- (Ty->getScalarType()->isBFloatTy() && !ST->hasBF16()))
- return 2 * LT.first;
- if (!Ty->getScalarType()->isFP128Ty())
- return LT.first;
case ISD::FMUL:
case ISD::FDIV:
+ case ISD::FNEG:
// These nodes are marked as 'custom' just to lower them to SVE.
// We know said lowering will incur no additional cost.
if (!Ty->getScalarType()->isFP128Ty())
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
index 18a1c31c03f7..d7bb0498fd07 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -5,14 +5,14 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @fadd() {
; CHECK-LABEL: 'fadd'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <vscale x 4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V4F16 = fadd <vscale x 4 x half> undef, undef
@@ -31,14 +31,14 @@ define void @fadd() {
define void @fsub() {
; CHECK-LABEL: 'fsub'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <vscale x 4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V4F16 = fsub <vscale x 4 x half> undef, undef
@@ -57,15 +57,15 @@ define void @fsub() {
define void @fneg() {
; CHECK-LABEL: 'fneg'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <vscale x 2 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <vscale x 4 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <vscale x 8 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <vscale x 16 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <vscale x 2 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <vscale x 4 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <vscale x 8 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <vscale x 2 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <vscale x 4 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <vscale x 2 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <vscale x 4 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <vscale x 8 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <vscale x 16 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <vscale x 2 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <vscale x 4 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <vscale x 8 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <vscale x 2 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <vscale x 4 x double> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2F16 = fneg <vscale x 2 x half> undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index dfbd2d8b39d0..5b7573b4ed84 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -5,17 +5,17 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define i32 @fadd(i32 %arg) {
; CHECK-LABEL: 'fadd'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fadd half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F16 = fadd half undef, undef
@@ -37,17 +37,17 @@ define i32 @fadd(i32 %arg) {
define i32 @fsub(i32 %arg) {
; CHECK-LABEL: 'fsub'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F16 = fsub half undef, undef
@@ -69,16 +69,16 @@ define i32 @fsub(i32 %arg) {
define i32 @fneg_idiom(i32 %arg) {
; CHECK-LABEL: 'fneg_idiom'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half 0xH8000, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F16 = fsub half -0.0, undef
@@ -99,18 +99,18 @@ define i32 @fneg_idiom(i32 %arg) {
define i32 @fneg(i32 %arg) {
; CHECK-LABEL: 'fneg'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F16 = fneg half undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
index 56138c23e946..96906c3d0496 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -38,7 +38,6 @@ define void @ext() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %z4i1i32 = zext <4 x i1> undef to <4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
@@ -111,9 +110,6 @@ define void @ext() {
%s2i32i64 = sext <2 x i32> undef to <2 x i64>
%z2i32i64 = zext <2 x i32> undef to <2 x i64>
- %z4i1i32 = zext <4 x i1> undef to <4 x i32>
-
-
%s4i8i16 = sext <4 x i8> undef to <4 x i16>
%z4i8i16 = zext <4 x i8> undef to <4 x i16>
%s4i8i32 = sext <4 x i8> undef to <4 x i32>
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
index fee770de564b..f995bfd07370 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
-; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FP16
-; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+bf16 < %s | FileCheck %s --check-prefix=BF16
+; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+fullfp16 < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -9,35 +8,13 @@ define void @strict_fp_reductions() {
; CHECK-LABEL: 'strict_fp_reductions'
; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP16-LABEL: 'strict_fp_reductions'
-; FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; BF16-LABEL: 'strict_fp_reductions'
-; BF16-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
%fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
@@ -60,67 +37,21 @@ define void @fast_fp_reductions() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP16-LABEL: 'fast_fp_reductions'
-; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; BF16-LABEL: 'fast_fp_reductions'
-; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
%fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll
index df40a962d0de..1c34e4fbc4e8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll
@@ -44,9 +44,9 @@ define void @add() #0 {
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i16 = add <32 x i16> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i32 = add <16 x i32> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i64 = add <8 x i64> undef, undef
-; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f16 = fadd <32 x half> undef, undef
-; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f32 = fadd <16 x float> undef, undef
-; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f64 = fadd <8 x double> undef, undef
+; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f16 = fadd <32 x half> undef, undef
+; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f32 = fadd <16 x float> undef, undef
+; CHECK: cost of [[#mul(div(511,VBITS)+1,2)]] for instruction: %add512.f64 = fadd <8 x double> undef, undef
%add512.i8 = add <64 x i8> undef, undef
%add512.i16 = add <32 x i16> undef, undef
%add512.i32 = add <16 x i32> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 327e25054c25..96e67cd2ad96 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -52,7 +52,7 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
@@ -79,7 +79,7 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
@@ -117,15 +117,15 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
define void @strict_fp_reductions(<vscale x 4 x float> %v0, <vscale x 4 x double> %v1) {
; CHECK-LABEL: 'strict_fp_reductions'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'strict_fp_reductions'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
index 3ddd246f77d3..d82b97958dd7 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
@@ -10,7 +10,7 @@ declare <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double>)
define <vscale x 2 x double> @fadd_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; THRU-LABEL: 'fadd_v2f64'
-; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = fadd <vscale x 2 x double> %a, %b
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 2 x double> %r
;
; LATE-LABEL: 'fadd_v2f64'
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
index ffa2713ac024..37384259e015 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
@@ -11,66 +11,46 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @wrap_mul4(ptr nocapture %Out, ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: @wrap_mul4(
; CHECK-NEXT: [[TEMP:%.*]] = load double, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[TEMP1:%.*]] = load double, ptr [[B:%.*]], align 8
-; CHECK-NEXT: [[MUL_I:%.*]] = fmul double [[TEMP]], [[TEMP1]]
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 0, i64 1
; CHECK-NEXT: [[TEMP2:%.*]] = load double, ptr [[ARRAYIDX5_I]], align 8
-; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 1, i64 0
-; CHECK-NEXT: [[TEMP3:%.*]] = load double, ptr [[ARRAYIDX7_I]], align 8
-; CHECK-NEXT: [[MUL8_I:%.*]] = fmul double [[TEMP2]], [[TEMP3]]
-; CHECK-NEXT: [[ADD_I:%.*]] = fadd double [[MUL_I]], [[MUL8_I]]
-; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 0, i64 1
-; CHECK-NEXT: [[TEMP4:%.*]] = load double, ptr [[ARRAYIDX13_I]], align 8
-; CHECK-NEXT: [[MUL14_I:%.*]] = fmul double [[TEMP]], [[TEMP4]]
-; CHECK-NEXT: [[ARRAYIDX18_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 1, i64 1
-; CHECK-NEXT: [[TEMP5:%.*]] = load double, ptr [[ARRAYIDX18_I]], align 8
-; CHECK-NEXT: [[MUL19_I:%.*]] = fmul double [[TEMP2]], [[TEMP5]]
-; CHECK-NEXT: [[ADD20_I:%.*]] = fadd double [[MUL14_I]], [[MUL19_I]]
+; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B:%.*]], i64 1, i64 0
; CHECK-NEXT: [[ARRAYIDX25_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 0, i64 2
-; CHECK-NEXT: [[TEMP6:%.*]] = load double, ptr [[ARRAYIDX25_I]], align 8
-; CHECK-NEXT: [[MUL26_I:%.*]] = fmul double [[TEMP]], [[TEMP6]]
; CHECK-NEXT: [[ARRAYIDX30_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 1, i64 2
-; CHECK-NEXT: [[TEMP7:%.*]] = load double, ptr [[ARRAYIDX30_I]], align 8
-; CHECK-NEXT: [[MUL31_I:%.*]] = fmul double [[TEMP2]], [[TEMP7]]
-; CHECK-NEXT: [[ADD32_I:%.*]] = fadd double [[MUL26_I]], [[MUL31_I]]
-; CHECK-NEXT: [[ARRAYIDX37_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 0, i64 3
-; CHECK-NEXT: [[TEMP8:%.*]] = load double, ptr [[ARRAYIDX37_I]], align 8
-; CHECK-NEXT: [[MUL38_I:%.*]] = fmul double [[TEMP]], [[TEMP8]]
-; CHECK-NEXT: [[ARRAYIDX42_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 1, i64 3
-; CHECK-NEXT: [[TEMP9:%.*]] = load double, ptr [[ARRAYIDX42_I]], align 8
-; CHECK-NEXT: [[MUL43_I:%.*]] = fmul double [[TEMP2]], [[TEMP9]]
-; CHECK-NEXT: [[ADD44_I:%.*]] = fadd double [[MUL38_I]], [[MUL43_I]]
; CHECK-NEXT: [[ARRAYIDX47_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 1, i64 0
; CHECK-NEXT: [[TEMP10:%.*]] = load double, ptr [[ARRAYIDX47_I]], align 8
-; CHECK-NEXT: [[MUL50_I:%.*]] = fmul double [[TEMP1]], [[TEMP10]]
; CHECK-NEXT: [[ARRAYIDX52_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 1, i64 1
; CHECK-NEXT: [[TEMP11:%.*]] = load double, ptr [[ARRAYIDX52_I]], align 8
-; CHECK-NEXT: [[MUL55_I:%.*]] = fmul double [[TEMP3]], [[TEMP11]]
-; CHECK-NEXT: [[ADD56_I:%.*]] = fadd double [[MUL50_I]], [[MUL55_I]]
-; CHECK-NEXT: [[MUL62_I:%.*]] = fmul double [[TEMP4]], [[TEMP10]]
-; CHECK-NEXT: [[MUL67_I:%.*]] = fmul double [[TEMP5]], [[TEMP11]]
-; CHECK-NEXT: [[ADD68_I:%.*]] = fadd double [[MUL62_I]], [[MUL67_I]]
-; CHECK-NEXT: [[MUL74_I:%.*]] = fmul double [[TEMP6]], [[TEMP10]]
-; CHECK-NEXT: [[MUL79_I:%.*]] = fmul double [[TEMP7]], [[TEMP11]]
-; CHECK-NEXT: [[ADD80_I:%.*]] = fadd double [[MUL74_I]], [[MUL79_I]]
-; CHECK-NEXT: [[MUL86_I:%.*]] = fmul double [[TEMP8]], [[TEMP10]]
-; CHECK-NEXT: [[MUL91_I:%.*]] = fmul double [[TEMP9]], [[TEMP11]]
-; CHECK-NEXT: [[ADD92_I:%.*]] = fadd double [[MUL86_I]], [[MUL91_I]]
-; CHECK-NEXT: store double [[ADD_I]], ptr [[OUT:%.*]], align 8
-; CHECK-NEXT: [[RES_I_SROA_4_0_OUT2_I_SROA_IDX2:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 1
-; CHECK-NEXT: store double [[ADD20_I]], ptr [[RES_I_SROA_4_0_OUT2_I_SROA_IDX2]], align 8
-; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 2
-; CHECK-NEXT: store double [[ADD32_I]], ptr [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]], align 8
-; CHECK-NEXT: [[RES_I_SROA_6_0_OUT2_I_SROA_IDX6:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 3
-; CHECK-NEXT: store double [[ADD44_I]], ptr [[RES_I_SROA_6_0_OUT2_I_SROA_IDX6]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX7_I]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], i64 2
+; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, ptr [[ARRAYIDX25_I]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, ptr [[ARRAYIDX30_I]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP13]], [[TMP16]]
+; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[OUT]], align 8
+; CHECK-NEXT: store <2 x double> [[TMP17]], ptr [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]], align 8
; CHECK-NEXT: [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 4
-; CHECK-NEXT: store double [[ADD56_I]], ptr [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]], align 8
-; CHECK-NEXT: [[RES_I_SROA_8_0_OUT2_I_SROA_IDX10:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 5
-; CHECK-NEXT: store double [[ADD68_I]], ptr [[RES_I_SROA_8_0_OUT2_I_SROA_IDX10]], align 8
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0
+; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE4]]
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0
+; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE5]]
+; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[TMP20]], [[TMP22]]
+; CHECK-NEXT: store <2 x double> [[TMP23]], ptr [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]], align 8
; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 6
-; CHECK-NEXT: store double [[ADD80_I]], ptr [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]], align 8
-; CHECK-NEXT: [[RES_I_SROA_10_0_OUT2_I_SROA_IDX14:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 7
-; CHECK-NEXT: store double [[ADD92_I]], ptr [[RES_I_SROA_10_0_OUT2_I_SROA_IDX14]], align 8
+; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[TMP12]], [[SHUFFLE4]]
+; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP15]], [[SHUFFLE5]]
+; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP25]], [[TMP26]]
+; CHECK-NEXT: store <2 x double> [[TMP27]], ptr [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]], align 8
; CHECK-NEXT: ret void
;
%temp = load double, ptr %A, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
index 219496fc1ac9..3decb7e42e24 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
@@ -7,7 +7,7 @@ define void @f(ptr %r, ptr %w) {
%add0 = fadd double %f0, %f0
%add1 = fadd double %f1, %f1
%w1 = getelementptr inbounds double, ptr %w, i64 1
-; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -3 and with tree size 3
+; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -4 and with tree size 3
store double %add0, ptr %w, !dbg !9
store double %add1, ptr %w1
ret void
More information about the llvm-commits
mailing list