[llvm] [CostModel][X86] Treat lrint/llrint as fptosi calls (PR #90883)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 09:41:18 PDT 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/90883
>From abaa3cf93eee4d00349538c2f7f6d16efcb32aaa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 2 May 2024 18:54:13 +0100
Subject: [PATCH] [CostModel][X86] Treat lrint/llrint as fptosi calls
X86 can use the CVTP2SI instructions to lower lrint/llrint calls, which have the same costs as the CVTTP2SI (fptosi) instructions
Followup to #90065
---
.../lib/Target/X86/X86TargetTransformInfo.cpp | 8 +
.../CostModel/X86/arith-fp-codesize.ll | 89 ++---
.../CostModel/X86/arith-fp-latency.ll | 170 +++------
.../CostModel/X86/arith-fp-sizelatency.ll | 170 +++------
llvm/test/Analysis/CostModel/X86/arith-fp.ll | 195 +++++-----
.../SLPVectorizer/X86/arith-fp-call.ll | 358 ++++++++++++------
6 files changed, 506 insertions(+), 484 deletions(-)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6c174f19b10707..c2f80cab537c4e 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4153,6 +4153,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
}
break;
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
+ if (!ICA.isTypeBasedOnly()) {
+ const SmallVectorImpl<Type *> &ArgTys = ICA.getArgTypes();
+ return getCastInstrCost(Instruction::FPToSI, RetTy, ArgTys[0],
+ TTI::CastContextHint::None, CostKind);
+ }
+ break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
// FMINNUM has same costs so don't duplicate.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
index 5247ef0cbd707b..b965a726262e68 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
@@ -666,35 +666,46 @@ define i32 @rint(i32 %arg) {
define i32 @lrint(i32 %arg) {
; SSE1-LABEL: 'lrint'
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
+; SSE2-LABEL: 'lrint'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
; AVX-LABEL: 'lrint'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'lrint'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i32 @llvm.lrint.i32.f32(float undef)
@@ -711,38 +722,16 @@ define i32 @lrint(i32 %arg) {
}
define i32 @llrint(i32 %arg) {
-; SSE1-LABEL: 'llrint'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'llrint'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512-LABEL: 'llrint'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-LABEL: 'llrint'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i64 @llvm.llrint.i64.f32(float undef)
%V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
index 3f4ecdf340d81b..c147bd2eef6e7e 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -1106,79 +1106,79 @@ define i32 @rint(i32 %arg) {
define i32 @lrint(i32 %arg) {
; SSE1-LABEL: 'lrint'
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'lrint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'lrint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'lrint'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'lrint'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'lrint'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'lrint'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i32 @llvm.lrint.i32.f32(float undef)
@@ -1195,82 +1195,16 @@ define i32 @lrint(i32 %arg) {
}
define i32 @llrint(i32 %arg) {
-; SSE1-LABEL: 'llrint'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE2-LABEL: 'llrint'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'llrint'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'llrint'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512-LABEL: 'llrint'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'llrint'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; GLM-LABEL: 'llrint'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-LABEL: 'llrint'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i64 @llvm.llrint.i64.f32(float undef)
%V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
index 8040675bef9c5c..d9312ac05601de 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -1040,79 +1040,79 @@ define i32 @rint(i32 %arg) {
define i32 @lrint(i32 %arg) {
; SSE1-LABEL: 'lrint'
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'lrint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'lrint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'lrint'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'lrint'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'lrint'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'lrint'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i32 @llvm.lrint.i32.f32(float undef)
@@ -1129,82 +1129,16 @@ define i32 @lrint(i32 %arg) {
}
define i32 @llrint(i32 %arg) {
-; SSE1-LABEL: 'llrint'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE2-LABEL: 'llrint'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'llrint'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX-LABEL: 'llrint'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512-LABEL: 'llrint'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'llrint'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; GLM-LABEL: 'llrint'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-LABEL: 'llrint'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call i64 @llvm.llrint.i64.f32(float undef)
%V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index 17bd223affa1d0..90871e3a3831c0 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -1183,79 +1183,90 @@ define i32 @rint(i32 %arg) {
define i32 @lrint(i32 %arg) {
; SSE1-LABEL: 'lrint'
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE2-LABEL: 'lrint'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'lrint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX-LABEL: 'lrint'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX1-LABEL: 'lrint'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'lrint'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'lrint'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'lrint'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'lrint'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F32 = call i32 @llvm.lrint.i32.f32(float undef)
@@ -1274,79 +1285,79 @@ define i32 @lrint(i32 %arg) {
define i32 @llrint(i32 %arg) {
; SSE1-LABEL: 'llrint'
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE2-LABEL: 'llrint'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'llrint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'llrint'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'llrint'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'llrint'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'llrint'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%F32 = call i64 @llvm.llrint.i64.f32(float undef)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-call.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-call.ll
index 413b3a70ebbb18..0bd152e18fb495 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-call.ll
@@ -3,8 +3,8 @@
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 -mattr=+prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v2 -mattr=-prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 -mattr=+prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC128
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 -mattr=-prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC256
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 -mattr=+prefer-256-bit | FileCheck %s --check-prefixes=CHECK,VEC256
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v3 -mattr=-prefer-128-bit | FileCheck %s --check-prefixes=CHECK,VEC256,VEC256-AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 -mattr=+prefer-256-bit | FileCheck %s --check-prefixes=CHECK,VEC256,VEC256-AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S -mcpu=x86-64-v4 -mattr=-prefer-256-bit | FileCheck %s --check-prefixes=CHECK,VEC512
@f64 = common global [16 x double] zeroinitializer, align 64
@@ -174,32 +174,35 @@ define void @rint_v8f64_v8f64() {
}
define void @lrint_v8f32_v8i32() {
-; CHECK-LABEL: @lrint_v8f32_v8i32(
-; CHECK-NEXT: [[A0:%.*]] = load float, ptr @f32, align 8
-; CHECK-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
-; CHECK-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
-; CHECK-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
-; CHECK-NEXT: [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
-; CHECK-NEXT: [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
-; CHECK-NEXT: [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
-; CHECK-NEXT: [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
-; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A0]])
-; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A1]])
-; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A2]])
-; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A3]])
-; CHECK-NEXT: [[R4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A4]])
-; CHECK-NEXT: [[R5:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A5]])
-; CHECK-NEXT: [[R6:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A6]])
-; CHECK-NEXT: [[R7:%.*]] = call i32 @llvm.lrint.i32.f32(float [[A7]])
-; CHECK-NEXT: store i32 [[R0]], ptr @r32, align 8
-; CHECK-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 1), align 8
-; CHECK-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 2), align 8
-; CHECK-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 3), align 8
-; CHECK-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
-; CHECK-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 5), align 8
-; CHECK-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 6), align 8
-; CHECK-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 7), align 8
-; CHECK-NEXT: ret void
+; SCALAR-LABEL: @lrint_v8f32_v8i32(
+; SCALAR-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
+; SCALAR-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP1]])
+; SCALAR-NEXT: store <4 x i32> [[TMP2]], ptr @r32, align 8
+; SCALAR-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; SCALAR-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP3]])
+; SCALAR-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
+; SCALAR-NEXT: ret void
+;
+; VEC128-LABEL: @lrint_v8f32_v8i32(
+; VEC128-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
+; VEC128-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP1]])
+; VEC128-NEXT: store <4 x i32> [[TMP2]], ptr @r32, align 8
+; VEC128-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; VEC128-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP3]])
+; VEC128-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
+; VEC128-NEXT: ret void
+;
+; VEC256-LABEL: @lrint_v8f32_v8i32(
+; VEC256-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
+; VEC256-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP1]])
+; VEC256-NEXT: store <8 x i32> [[TMP2]], ptr @r32, align 8
+; VEC256-NEXT: ret void
+;
+; VEC512-LABEL: @lrint_v8f32_v8i32(
+; VEC512-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
+; VEC512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP1]])
+; VEC512-NEXT: store <8 x i32> [[TMP2]], ptr @r32, align 8
+; VEC512-NEXT: ret void
;
%a0 = load float, ptr @f32, align 8
%a1 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
@@ -229,32 +232,35 @@ define void @lrint_v8f32_v8i32() {
}
define void @lrint_v8f64_v8i32() {
-; CHECK-LABEL: @lrint_v8f64_v8i32(
-; CHECK-NEXT: [[A0:%.*]] = load double, ptr @f64, align 8
-; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
-; CHECK-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
-; CHECK-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
-; CHECK-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
-; CHECK-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
-; CHECK-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
-; CHECK-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
-; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A0]])
-; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A1]])
-; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A2]])
-; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A3]])
-; CHECK-NEXT: [[R4:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A4]])
-; CHECK-NEXT: [[R5:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A5]])
-; CHECK-NEXT: [[R6:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A6]])
-; CHECK-NEXT: [[R7:%.*]] = call i32 @llvm.lrint.i32.f64(double [[A7]])
-; CHECK-NEXT: store i32 [[R0]], ptr @r32, align 8
-; CHECK-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 1), align 8
-; CHECK-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 2), align 8
-; CHECK-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 3), align 8
-; CHECK-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
-; CHECK-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 5), align 8
-; CHECK-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 6), align 8
-; CHECK-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 7), align 8
-; CHECK-NEXT: ret void
+; SCALAR-LABEL: @lrint_v8f64_v8i32(
+; SCALAR-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
+; SCALAR-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP1]])
+; SCALAR-NEXT: store <4 x i32> [[TMP2]], ptr @r32, align 8
+; SCALAR-NEXT: [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; SCALAR-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP3]])
+; SCALAR-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
+; SCALAR-NEXT: ret void
+;
+; VEC128-LABEL: @lrint_v8f64_v8i32(
+; VEC128-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
+; VEC128-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP1]])
+; VEC128-NEXT: store <4 x i32> [[TMP2]], ptr @r32, align 8
+; VEC128-NEXT: [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; VEC128-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> [[TMP3]])
+; VEC128-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @r32, i32 0, i32 4), align 8
+; VEC128-NEXT: ret void
+;
+; VEC256-LABEL: @lrint_v8f64_v8i32(
+; VEC256-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
+; VEC256-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> [[TMP1]])
+; VEC256-NEXT: store <8 x i32> [[TMP2]], ptr @r32, align 8
+; VEC256-NEXT: ret void
+;
+; VEC512-LABEL: @lrint_v8f64_v8i32(
+; VEC512-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
+; VEC512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> [[TMP1]])
+; VEC512-NEXT: store <8 x i32> [[TMP2]], ptr @r32, align 8
+; VEC512-NEXT: ret void
;
%a0 = load double, ptr @f64, align 8
%a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
@@ -284,32 +290,101 @@ define void @lrint_v8f64_v8i32() {
}
define void @llrint_v8f32_v8i64() {
-; CHECK-LABEL: @llrint_v8f32_v8i64(
-; CHECK-NEXT: [[A0:%.*]] = load float, ptr @f32, align 8
-; CHECK-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
-; CHECK-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
-; CHECK-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
-; CHECK-NEXT: [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
-; CHECK-NEXT: [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
-; CHECK-NEXT: [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
-; CHECK-NEXT: [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
-; CHECK-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
-; CHECK-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
-; CHECK-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
-; CHECK-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
-; CHECK-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
-; CHECK-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
-; CHECK-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
-; CHECK-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
-; CHECK-NEXT: store i64 [[R0]], ptr @r64, align 8
-; CHECK-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
-; CHECK-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
-; CHECK-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
-; CHECK-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
-; CHECK-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
-; CHECK-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
-; CHECK-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
-; CHECK-NEXT: ret void
+; SCALAR-LABEL: @llrint_v8f32_v8i64(
+; SCALAR-NEXT: [[A0:%.*]] = load float, ptr @f32, align 8
+; SCALAR-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
+; SCALAR-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
+; SCALAR-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
+; SCALAR-NEXT: [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; SCALAR-NEXT: [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
+; SCALAR-NEXT: [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
+; SCALAR-NEXT: [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
+; SCALAR-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
+; SCALAR-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
+; SCALAR-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
+; SCALAR-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
+; SCALAR-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
+; SCALAR-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
+; SCALAR-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
+; SCALAR-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
+; SCALAR-NEXT: store i64 [[R0]], ptr @r64, align 8
+; SCALAR-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; SCALAR-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; SCALAR-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; SCALAR-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; SCALAR-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; SCALAR-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; SCALAR-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; SCALAR-NEXT: ret void
+;
+; VEC128-LABEL: @llrint_v8f32_v8i64(
+; VEC128-NEXT: [[A0:%.*]] = load float, ptr @f32, align 8
+; VEC128-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
+; VEC128-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
+; VEC128-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
+; VEC128-NEXT: [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; VEC128-NEXT: [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
+; VEC128-NEXT: [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
+; VEC128-NEXT: [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
+; VEC128-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
+; VEC128-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
+; VEC128-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
+; VEC128-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
+; VEC128-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
+; VEC128-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
+; VEC128-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
+; VEC128-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
+; VEC128-NEXT: store i64 [[R0]], ptr @r64, align 8
+; VEC128-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; VEC128-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; VEC128-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; VEC128-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC128-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; VEC128-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; VEC128-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; VEC128-NEXT: ret void
+;
+; VEC256-AVX2-LABEL: @llrint_v8f32_v8i64(
+; VEC256-AVX2-NEXT: [[A0:%.*]] = load float, ptr @f32, align 8
+; VEC256-AVX2-NEXT: [[A1:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
+; VEC256-AVX2-NEXT: [[A2:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 2), align 8
+; VEC256-AVX2-NEXT: [[A3:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 3), align 8
+; VEC256-AVX2-NEXT: [[A4:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; VEC256-AVX2-NEXT: [[A5:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 5), align 8
+; VEC256-AVX2-NEXT: [[A6:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 6), align 8
+; VEC256-AVX2-NEXT: [[A7:%.*]] = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 7), align 8
+; VEC256-AVX2-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A0]])
+; VEC256-AVX2-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A1]])
+; VEC256-AVX2-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A2]])
+; VEC256-AVX2-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A3]])
+; VEC256-AVX2-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A4]])
+; VEC256-AVX2-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A5]])
+; VEC256-AVX2-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A6]])
+; VEC256-AVX2-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[A7]])
+; VEC256-AVX2-NEXT: store i64 [[R0]], ptr @r64, align 8
+; VEC256-AVX2-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; VEC256-AVX2-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; VEC256-AVX2-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; VEC256-AVX2-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC256-AVX2-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; VEC256-AVX2-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; VEC256-AVX2-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; VEC256-AVX2-NEXT: ret void
+;
+; VEC256-AVX512-LABEL: @llrint_v8f32_v8i64(
+; VEC256-AVX512-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr @f32, align 8
+; VEC256-AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP1]])
+; VEC256-AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @r64, align 8
+; VEC256-AVX512-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 4), align 8
+; VEC256-AVX512-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP3]])
+; VEC256-AVX512-NEXT: store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC256-AVX512-NEXT: ret void
+;
+; VEC512-LABEL: @llrint_v8f32_v8i64(
+; VEC512-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr @f32, align 8
+; VEC512-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> [[TMP1]])
+; VEC512-NEXT: store <8 x i64> [[TMP2]], ptr @r64, align 8
+; VEC512-NEXT: ret void
;
%a0 = load float, ptr @f32, align 8
%a1 = load float, ptr getelementptr inbounds ([8 x float], ptr @f32, i32 0, i32 1), align 8
@@ -339,32 +414,101 @@ define void @llrint_v8f32_v8i64() {
}
define void @llrint_v8f64_v8i64() {
-; CHECK-LABEL: @llrint_v8f64_v8i64(
-; CHECK-NEXT: [[A0:%.*]] = load double, ptr @f64, align 8
-; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
-; CHECK-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
-; CHECK-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
-; CHECK-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
-; CHECK-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
-; CHECK-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
-; CHECK-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
-; CHECK-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
-; CHECK-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
-; CHECK-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
-; CHECK-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
-; CHECK-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
-; CHECK-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
-; CHECK-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
-; CHECK-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
-; CHECK-NEXT: store i64 [[R0]], ptr @r64, align 8
-; CHECK-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
-; CHECK-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
-; CHECK-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
-; CHECK-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
-; CHECK-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
-; CHECK-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
-; CHECK-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
-; CHECK-NEXT: ret void
+; SCALAR-LABEL: @llrint_v8f64_v8i64(
+; SCALAR-NEXT: [[A0:%.*]] = load double, ptr @f64, align 8
+; SCALAR-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
+; SCALAR-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
+; SCALAR-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
+; SCALAR-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; SCALAR-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
+; SCALAR-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
+; SCALAR-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
+; SCALAR-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
+; SCALAR-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
+; SCALAR-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
+; SCALAR-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
+; SCALAR-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
+; SCALAR-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
+; SCALAR-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
+; SCALAR-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
+; SCALAR-NEXT: store i64 [[R0]], ptr @r64, align 8
+; SCALAR-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; SCALAR-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; SCALAR-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; SCALAR-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; SCALAR-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; SCALAR-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; SCALAR-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; SCALAR-NEXT: ret void
+;
+; VEC128-LABEL: @llrint_v8f64_v8i64(
+; VEC128-NEXT: [[A0:%.*]] = load double, ptr @f64, align 8
+; VEC128-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
+; VEC128-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
+; VEC128-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
+; VEC128-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; VEC128-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
+; VEC128-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
+; VEC128-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
+; VEC128-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
+; VEC128-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
+; VEC128-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
+; VEC128-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
+; VEC128-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
+; VEC128-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
+; VEC128-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
+; VEC128-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
+; VEC128-NEXT: store i64 [[R0]], ptr @r64, align 8
+; VEC128-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; VEC128-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; VEC128-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; VEC128-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC128-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; VEC128-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; VEC128-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; VEC128-NEXT: ret void
+;
+; VEC256-AVX2-LABEL: @llrint_v8f64_v8i64(
+; VEC256-AVX2-NEXT: [[A0:%.*]] = load double, ptr @f64, align 8
+; VEC256-AVX2-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
+; VEC256-AVX2-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 2), align 8
+; VEC256-AVX2-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 3), align 8
+; VEC256-AVX2-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; VEC256-AVX2-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 5), align 8
+; VEC256-AVX2-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 6), align 8
+; VEC256-AVX2-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 7), align 8
+; VEC256-AVX2-NEXT: [[R0:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A0]])
+; VEC256-AVX2-NEXT: [[R1:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A1]])
+; VEC256-AVX2-NEXT: [[R2:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A2]])
+; VEC256-AVX2-NEXT: [[R3:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A3]])
+; VEC256-AVX2-NEXT: [[R4:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A4]])
+; VEC256-AVX2-NEXT: [[R5:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A5]])
+; VEC256-AVX2-NEXT: [[R6:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A6]])
+; VEC256-AVX2-NEXT: [[R7:%.*]] = call i64 @llvm.llrint.i64.f64(double [[A7]])
+; VEC256-AVX2-NEXT: store i64 [[R0]], ptr @r64, align 8
+; VEC256-AVX2-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 1), align 8
+; VEC256-AVX2-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 2), align 8
+; VEC256-AVX2-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 3), align 8
+; VEC256-AVX2-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC256-AVX2-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 5), align 8
+; VEC256-AVX2-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 6), align 8
+; VEC256-AVX2-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
+; VEC256-AVX2-NEXT: ret void
+;
+; VEC256-AVX512-LABEL: @llrint_v8f64_v8i64(
+; VEC256-AVX512-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @f64, align 8
+; VEC256-AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> [[TMP1]])
+; VEC256-AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @r64, align 8
+; VEC256-AVX512-NEXT: [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 4), align 8
+; VEC256-AVX512-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> [[TMP3]])
+; VEC256-AVX512-NEXT: store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 4), align 8
+; VEC256-AVX512-NEXT: ret void
+;
+; VEC512-LABEL: @llrint_v8f64_v8i64(
+; VEC512-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @f64, align 8
+; VEC512-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> [[TMP1]])
+; VEC512-NEXT: store <8 x i64> [[TMP2]], ptr @r64, align 8
+; VEC512-NEXT: ret void
;
%a0 = load double, ptr @f64, align 8
%a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @f64, i32 0, i32 1), align 8
@@ -392,3 +536,5 @@ define void @llrint_v8f64_v8i64() {
store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @r64, i32 0, i32 7), align 8
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
More information about the llvm-commits
mailing list