[llvm] [CostModel][X86] merge integer multiply costs tests using -cost-kind=all (PR #131864)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 10:28:44 PDT 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/131864
None
>From 4b885f09501920cc8c0f3dd5b8a25e48f0b2fac6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 18 Mar 2025 17:23:20 +0000
Subject: [PATCH] [CostModel][X86] merge integer multiply costs tests using
-cost-kind=all
---
.../Analysis/CostModel/X86/mul-codesize.ll | 716 ----
.../Analysis/CostModel/X86/mul-latency.ll | 716 ----
.../Analysis/CostModel/X86/mul-sizelatency.ll | 716 ----
llvm/test/Analysis/CostModel/X86/mul.ll | 1110 +++---
llvm/test/Analysis/CostModel/X86/mul32.ll | 1656 ++++-----
llvm/test/Analysis/CostModel/X86/mul64.ll | 3218 ++++++++---------
6 files changed, 2992 insertions(+), 5140 deletions(-)
delete mode 100644 llvm/test/Analysis/CostModel/X86/mul-codesize.ll
delete mode 100644 llvm/test/Analysis/CostModel/X86/mul-latency.ll
delete mode 100644 llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
diff --git a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll
deleted file mode 100644
index 75585cf0a6e85..0000000000000
--- a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll
+++ /dev/null
@@ -1,716 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefix=SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefix=AVX1
-
-define i32 @mul_constpow2() {
-; SSE2-LABEL: 'mul_constpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
- %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstpow2() {
-; SSE2-LABEL: 'mul_uniformconstpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
- %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_constnegpow2() {
-; SSE2-LABEL: 'mul_constnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
- %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstnegpow2() {
-; SSE2-LABEL: 'mul_uniformconstnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
- %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-
- ret i32 undef
-}
diff --git a/llvm/test/Analysis/CostModel/X86/mul-latency.ll b/llvm/test/Analysis/CostModel/X86/mul-latency.ll
deleted file mode 100644
index 9245f2c5740f3..0000000000000
--- a/llvm/test/Analysis/CostModel/X86/mul-latency.ll
+++ /dev/null
@@ -1,716 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefix=SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefix=AVX1
-
-define i32 @mul_constpow2() {
-; SSE2-LABEL: 'mul_constpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
- %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstpow2() {
-; SSE2-LABEL: 'mul_uniformconstpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
- %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_constnegpow2() {
-; SSE2-LABEL: 'mul_constnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
- %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstnegpow2() {
-; SSE2-LABEL: 'mul_uniformconstnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
- %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-
- ret i32 undef
-}
diff --git a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
deleted file mode 100644
index 2d1bf23e9699c..0000000000000
--- a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
+++ /dev/null
@@ -1,716 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefix=SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefix=AVX1
-
-define i32 @mul_constpow2() {
-; SSE2-LABEL: 'mul_constpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
- %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
- %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstpow2() {
-; SSE2-LABEL: 'mul_uniformconstpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, 16
- %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
- %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
- %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-
- %I32 = mul i32 undef, 16
- %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
- %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
- %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
- %I16 = mul i16 undef, 16
- %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
- %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-
- %I8 = mul i8 undef, 16
- %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
- %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-
- ret i32 undef
-}
-
-define i32 @mul_constnegpow2() {
-; SSE2-LABEL: 'mul_constnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_constnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_constnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_constnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_constnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_constnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_constnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_constnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
- %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
- %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-
- ret i32 undef
-}
-
-define i32 @mul_uniformconstnegpow2() {
-; SSE2-LABEL: 'mul_uniformconstnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'mul_uniformconstnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'mul_uniformconstnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX1-LABEL: 'mul_uniformconstnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'mul_uniformconstnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512F-LABEL: 'mul_uniformconstnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'mul_uniformconstnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
- %I64 = mul i64 undef, -16
- %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
- %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
- %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
-
- %I32 = mul i32 undef, -16
- %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
- %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
- %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-
- %I16 = mul i16 undef, -16
- %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
- %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-
- %I8 = mul i8 undef, -16
- %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
- %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-
- ret i32 undef
-}
diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll
index 4602928a981ea..71bf0d6cd1fb5 100644
--- a/llvm/test/Analysis/CostModel/X86/mul.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul.ll
@@ -1,168 +1,168 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefix=SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefix=SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefix=AVX1
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx | FileCheck %s --check-prefix=AVX1
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
+;
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=slm | FileCheck %s --check-prefix=SLM
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=goldmont | FileCheck %s --check-prefix=SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=btver2 | FileCheck %s --check-prefix=AVX1
define i32 @mul_constpow2() {
; SSE2-LABEL: 'mul_constpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:7 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:14 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:24 SizeLat:28 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:26 Lat:21 SizeLat:28 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:52 Lat:42 SizeLat:56 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:52 CodeSize:104 Lat:84 SizeLat:112 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSSE3-LABEL: 'mul_constpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:7 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:14 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:24 SizeLat:28 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:13 CodeSize:26 Lat:21 SizeLat:28 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:26 CodeSize:52 Lat:42 SizeLat:56 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:52 CodeSize:104 Lat:84 SizeLat:112 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSE42-LABEL: 'mul_constpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:7 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:14 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:24 SizeLat:28 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:24 SizeLat:22 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:30 CodeSize:34 Lat:48 SizeLat:44 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:60 CodeSize:68 Lat:96 SizeLat:88 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX1-LABEL: 'mul_constpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:6 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:7 SizeLat:15 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:14 SizeLat:30 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:3 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:10 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:16 SizeLat:20 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:6 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:16 SizeLat:12 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:11 Lat:21 SizeLat:17 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:27 Lat:22 SizeLat:40 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:44 CodeSize:54 Lat:44 SizeLat:80 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX2-LABEL: 'mul_constpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:2 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:4 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:3 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:6 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:21 SizeLat:16 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:23 SizeLat:22 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:46 SizeLat:44 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512F-LABEL: 'mul_constpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:11 Lat:16 SizeLat:15 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:21 SizeLat:16 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:23 SizeLat:22 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512BW-LABEL: 'mul_constpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:5 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:11 Lat:23 SizeLat:16 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:13 Lat:19 SizeLat:15 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SLM-LABEL: 'mul_constpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:7 for: %V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:14 for: %V4i64 = mul <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:24 SizeLat:28 for: %V8i64 = mul <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:11 SizeLat:7 for: %V4i32 = mul <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:2 Lat:22 SizeLat:14 for: %V8i32 = mul <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:44 SizeLat:28 for: %V16i32 = mul <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SLM-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:24 SizeLat:22 for: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:30 CodeSize:34 Lat:48 SizeLat:44 for: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:60 CodeSize:68 Lat:96 SizeLat:88 for: %V64i8 = mul <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = mul i64 undef, 16
%V2i64 = mul <2 x i64> undef, <i64 8, i64 16>
@@ -189,156 +189,156 @@ define i32 @mul_constpow2() {
define i32 @mul_uniformconstpow2() {
; SSE2-LABEL: 'mul_uniformconstpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:14 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:28 SizeLat:12 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSSE3-LABEL: 'mul_uniformconstpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:14 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:28 SizeLat:12 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSE42-LABEL: 'mul_uniformconstpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:14 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:28 SizeLat:12 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX1-LABEL: 'mul_uniformconstpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:6 SizeLat:5 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:12 SizeLat:10 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:6 SizeLat:5 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:12 SizeLat:10 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:6 SizeLat:5 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:12 SizeLat:10 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:7 SizeLat:8 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:14 SizeLat:16 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX2-LABEL: 'mul_uniformconstpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:8 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:8 SizeLat:4 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:16 SizeLat:8 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512F-LABEL: 'mul_uniformconstpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:7 SizeLat:4 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:8 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:8 SizeLat:4 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:5 Lat:12 SizeLat:6 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512BW-LABEL: 'mul_uniformconstpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:8 SizeLat:3 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:8 SizeLat:3 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SLM-LABEL: 'mul_uniformconstpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I64 = mul i64 undef, 16
+; SLM-NEXT: Cost Model: Found costs of 1 for: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLM-NEXT: Cost Model: Found costs of 4 for: %V8i64 = mul <8 x i64> undef, splat (i64 16)
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I32 = mul i32 undef, 16
+; SLM-NEXT: Cost Model: Found costs of 1 for: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLM-NEXT: Cost Model: Found costs of 4 for: %V16i32 = mul <16 x i32> undef, splat (i32 16)
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I16 = mul i16 undef, 16
+; SLM-NEXT: Cost Model: Found costs of 1 for: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLM-NEXT: Cost Model: Found costs of 4 for: %V32i16 = mul <32 x i16> undef, splat (i16 16)
+; SLM-NEXT: Cost Model: Found costs of 1 for: %I8 = mul i8 undef, 16
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:7 SizeLat:3 for: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:14 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:28 SizeLat:12 for: %V64i8 = mul <64 x i8> undef, splat (i8 16)
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = mul i64 undef, 16
%V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
@@ -365,156 +365,156 @@ define i32 @mul_uniformconstpow2() {
define i32 @mul_constnegpow2() {
; SSE2-LABEL: 'mul_constnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:8 SizeLat:9 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:16 SizeLat:18 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:32 SizeLat:36 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:9 SizeLat:8 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:17 SizeLat:15 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:33 SizeLat:29 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:11 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:21 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:27 Lat:22 SizeLat:29 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:53 Lat:43 SizeLat:57 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:105 Lat:85 SizeLat:113 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSSE3-LABEL: 'mul_constnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:8 SizeLat:9 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:16 SizeLat:18 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:32 SizeLat:36 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:9 SizeLat:8 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:15 Lat:17 SizeLat:15 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:29 Lat:33 SizeLat:29 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:11 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:21 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:27 Lat:22 SizeLat:29 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:53 Lat:43 SizeLat:57 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:105 Lat:85 SizeLat:113 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSE42-LABEL: 'mul_constnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:8 SizeLat:9 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:16 SizeLat:18 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:32 SizeLat:36 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:12 SizeLat:2 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:23 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:5 Lat:45 SizeLat:5 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:11 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:21 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:18 Lat:25 SizeLat:23 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:35 Lat:49 SizeLat:45 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:69 Lat:97 SizeLat:89 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX1-LABEL: 'mul_constnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:9 SizeLat:21 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:18 SizeLat:42 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:6 SizeLat:4 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:9 CodeSize:10 Lat:10 SizeLat:16 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:18 CodeSize:20 Lat:20 SizeLat:32 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:12 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:24 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:12 Lat:22 SizeLat:18 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:32 Lat:24 SizeLat:46 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:64 Lat:48 SizeLat:92 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX2-LABEL: 'mul_constnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:4 SizeLat:2 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:2 Lat:5 SizeLat:4 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:8 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:4 SizeLat:4 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:2 Lat:5 SizeLat:5 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:10 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:6 SizeLat:4 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:12 SizeLat:8 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:12 Lat:22 SizeLat:17 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:12 Lat:24 SizeLat:24 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:24 Lat:48 SizeLat:48 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512F-LABEL: 'mul_constnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:2 SizeLat:3 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:2 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:6 SizeLat:4 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:23 SizeLat:20 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:12 Lat:22 SizeLat:17 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:12 Lat:24 SizeLat:24 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:18 CodeSize:32 Lat:26 SizeLat:38 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512BW-LABEL: 'mul_constnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:6 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:24 SizeLat:17 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:14 Lat:20 SizeLat:16 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SLM-LABEL: 'mul_constnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:12 Lat:16 SizeLat:18 for: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:24 Lat:32 SizeLat:36 for: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:2 Lat:12 SizeLat:8 for: %V4i32 = mul <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:3 Lat:23 SizeLat:15 for: %V8i32 = mul <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:5 Lat:45 SizeLat:29 for: %V16i32 = mul <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:6 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:11 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:5 Lat:21 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:18 Lat:25 SizeLat:23 for: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:35 Lat:49 SizeLat:45 for: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:64 CodeSize:69 Lat:97 SizeLat:89 for: %V64i8 = mul <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = mul i64 undef, -16
%V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
@@ -541,156 +541,156 @@ define i32 @mul_constnegpow2() {
define i32 @mul_uniformconstnegpow2() {
; SSE2-LABEL: 'mul_uniformconstnegpow2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:6 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:12 SizeLat:12 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:15 SizeLat:7 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:29 SizeLat:13 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSSE3-LABEL: 'mul_uniformconstnegpow2'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:6 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:12 SizeLat:12 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:15 SizeLat:7 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:29 SizeLat:13 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SSE42-LABEL: 'mul_uniformconstnegpow2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:6 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:12 SizeLat:12 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:15 SizeLat:7 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:29 SizeLat:13 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX1-LABEL: 'mul_uniformconstnegpow2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:2 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:8 SizeLat:11 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:16 SizeLat:22 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:8 SizeLat:11 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:16 SizeLat:22 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:8 SizeLat:11 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:16 SizeLat:22 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; AVX1-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:9 SizeLat:14 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:18 SizeLat:28 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX2-LABEL: 'mul_uniformconstnegpow2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:4 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:4 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:2 Lat:3 SizeLat:4 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:8 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; AVX2-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:18 SizeLat:12 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512F-LABEL: 'mul_uniformconstnegpow2'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:2 SizeLat:3 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:2 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:8 SizeLat:6 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:17 SizeLat:12 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:6 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:19 SizeLat:11 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:9 SizeLat:4 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
; SLM-LABEL: 'mul_uniformconstnegpow2'
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I64 = mul i64 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:2 Lat:3 SizeLat:3 for: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:6 SizeLat:6 for: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:8 Lat:12 SizeLat:12 for: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I32 = mul i32 undef, -16
+; SLM-NEXT: Cost Model: Found costs of 2 for: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I16 = mul i16 undef, -16
+; SLM-NEXT: Cost Model: Found costs of 2 for: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
+; SLM-NEXT: Cost Model: Found costs of 2 for: %I8 = mul i8 undef, -16
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:8 SizeLat:4 for: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:15 SizeLat:7 for: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:29 SizeLat:13 for: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = mul i64 undef, -16
%V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll
index d50fc41e7b049..41c000aa7d130 100644
--- a/llvm/test/Analysis/CostModel/X86/mul32.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul32.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512F
;
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -22,148 +22,148 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_sext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:3 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:10 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:16 SizeLat:20 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:20 Lat:32 SizeLat:40 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:40 Lat:64 SizeLat:80 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_sext_vXi8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:20 SizeLat:4 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:40 SizeLat:8 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 3 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of 6 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of 12 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of 24 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of 48 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i8> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i8> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i8> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i8> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = sext <4 x i8> %a4 to <4 x i32>
%xb4 = sext <4 x i8> %b4 to <4 x i32>
@@ -185,166 +185,166 @@ define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_zext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_zext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_zext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:6 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:16 SizeLat:12 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:32 SizeLat:24 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:64 SizeLat:48 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_zext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_zext_vXi8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_zext_vXi8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_zext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_zext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i8> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i8> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i8> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i8> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = zext <4 x i8> %a4 to <4 x i32>
%xb4 = zext <4 x i8> %b4 to <4 x i32>
@@ -366,166 +366,166 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_sext_zext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:6 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:16 SizeLat:12 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:32 SizeLat:24 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:64 SizeLat:48 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_zext_vXi8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_zext_vXi8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_zext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i8> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i8> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i8> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i8> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i8> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i8> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i8> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i8> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = sext <4 x i8> %a4 to <4 x i32>
%xb4 = zext <4 x i8> %b4 to <4 x i32>
@@ -551,166 +551,166 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_sext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:6 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:16 SizeLat:12 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:32 SizeLat:24 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:64 SizeLat:48 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_vXi16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_vXi16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = sext <8 x i16> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = sext <16 x i16> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = sext <32 x i16> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = sext <64 x i16> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = sext <4 x i16> %a4 to <4 x i32>
%xb4 = sext <4 x i16> %b4 to <4 x i32>
@@ -732,148 +732,148 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1
define void @mul_zext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_zext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:56 Lat:64 SizeLat:56 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:112 Lat:128 SizeLat:112 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:56 Lat:64 SizeLat:56 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:112 Lat:128 SizeLat:112 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_zext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_zext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:3 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:10 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:16 SizeLat:20 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:20 Lat:32 SizeLat:40 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:40 Lat:64 SizeLat:80 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_zext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_zext_vXi16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:20 SizeLat:4 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:40 SizeLat:8 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_zext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 5 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of 10 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of 20 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of 40 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of 80 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_zext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = zext <8 x i16> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = zext <16 x i16> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = zext <32 x i16> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = zext <64 x i16> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = zext <4 x i16> %a4 to <4 x i32>
%xb4 = zext <4 x i16> %b4 to <4 x i32>
@@ -895,148 +895,148 @@ define void @mul_zext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1
define void @mul_sext_zext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_sext_zext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:56 Lat:64 SizeLat:56 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:112 Lat:128 SizeLat:112 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:8 SizeLat:7 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:16 SizeLat:14 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:32 SizeLat:28 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:56 Lat:64 SizeLat:56 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:112 Lat:128 SizeLat:112 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:5 SizeLat:3 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:10 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:10 Lat:16 SizeLat:20 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:20 Lat:32 SizeLat:40 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:40 Lat:64 SizeLat:80 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:20 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:40 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:80 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_sext_zext_vXi16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:10 SizeLat:2 for: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:20 SizeLat:4 for: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:40 SizeLat:8 for: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_zext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; SLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:11 SizeLat:7 for: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:2 Lat:22 SizeLat:14 for: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:44 SizeLat:28 for: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:88 CodeSize:8 Lat:88 SizeLat:56 for: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:176 CodeSize:16 Lat:176 SizeLat:112 for: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa8 = sext <8 x i16> %a8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb8 = zext <8 x i16> %b8 to <8 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa16 = sext <16 x i16> %a16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb16 = zext <16 x i16> %b16 to <16 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa32 = sext <32 x i16> %a32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb32 = zext <32 x i16> %b32 to <32 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa64 = sext <64 x i16> %a64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb64 = zext <64 x i16> %b64 to <64 x i32>
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:11 SizeLat:1 for: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:22 SizeLat:2 for: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:44 SizeLat:4 for: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:88 SizeLat:8 for: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:176 SizeLat:16 for: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa4 = sext <4 x i16> %a4 to <4 x i32>
%xb4 = zext <4 x i16> %b4 to <4 x i32>
diff --git a/llvm/test/Analysis/CostModel/X86/mul64.ll b/llvm/test/Analysis/CostModel/X86/mul64.ll
index 718972093e8e5..9e4794760404f 100644
--- a/llvm/test/Analysis/CostModel/X86/mul64.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul64.ll
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
-;
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
+;
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512,AVX512F
;
; mul vXi8 -> mXi64
@@ -19,214 +19,214 @@
define void @mul_sext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_sext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_vXi8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_vXi8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_vXi8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i8> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i8> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i8> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i8> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i8> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i8> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i8> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i8> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i8> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i8> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i8> %a2 to <2 x i64>
%xb2 = sext <2 x i8> %b2 to <2 x i64>
@@ -251,172 +251,172 @@ define void @mul_sext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i8> %b
define void @mul_zext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_zext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_zext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_zext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_zext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_zext_vXi8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_zext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_zext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i8> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i8> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i8> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i8> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i8> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i8> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i8> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i8> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i8> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i8> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i8> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = zext <2 x i8> %a2 to <2 x i64>
%xb2 = zext <2 x i8> %b2 to <2 x i64>
@@ -441,214 +441,214 @@ define void @mul_zext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i8> %b
define void @mul_sext_zext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b8, <16 x i8> %a16, <16 x i8> %b16, <32 x i8> %a32, <32 x i8> %b32, <64 x i8> %a64, <64 x i8> %b64) {
; SSE2-LABEL: 'mul_sext_zext_vXi8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:128 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_zext_vXi8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_zext_vXi8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_zext_vXi8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_zext_vXi8'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi8'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i8> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i8> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i8> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i8> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i8> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i8> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i8> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i8> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i8> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i8> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i8> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i8> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i8> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i8> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i8> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i8> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i8> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i8> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i8> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i8> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i8> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i8> %a2 to <2 x i64>
%xb2 = zext <2 x i8> %b2 to <2 x i64>
@@ -677,214 +677,214 @@ define void @mul_sext_zext_vXi8(<2 x i8> %a2, <2 x i8> %b2, <4 x i8> %a4, <4 x i
define void @mul_sext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_sext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_vXi16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_vXi16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_vXi16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i16> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i16> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i16> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i16> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i16> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i16> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i16> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i16> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i16> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i16> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i16> %a2 to <2 x i64>
%xb2 = sext <2 x i16> %b2 to <2 x i64>
@@ -909,172 +909,172 @@ define void @mul_sext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4 x i1
define void @mul_zext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_zext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_zext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_zext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_zext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_zext_vXi16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_zext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_zext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i16> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i16> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i16> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i16> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i16> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i16> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i16> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i16> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i16> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i16> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i16> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i16> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = zext <2 x i16> %a2 to <2 x i64>
%xb2 = zext <2 x i16> %b2 to <2 x i64>
@@ -1099,214 +1099,214 @@ define void @mul_zext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4 x i1
define void @mul_sext_zext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i16> %b8, <16 x i16> %a16, <16 x i16> %b16, <32 x i16> %a32, <32 x i16> %b32, <64 x i16> %a64, <64 x i16> %b64) {
; SSE2-LABEL: 'mul_sext_zext_vXi16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:52 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_zext_vXi16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_zext_vXi16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_zext_vXi16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_zext_vXi16'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi16'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i16> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i16> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i16> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i16> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i16> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i16> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i16> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i16> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i16> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i16> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i16> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i16> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i16> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i16> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i16> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i16> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i16> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i16> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i16> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i16> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i16> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i16> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i16> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i16> %a2 to <2 x i64>
%xb2 = zext <2 x i16> %b2 to <2 x i64>
@@ -1335,214 +1335,214 @@ define void @mul_sext_zext_vXi16(<2 x i16> %a2, <2 x i16> %b2, <4 x i16> %a4, <4
define void @mul_sext_vXi32(<2 x i32> %a2, <2 x i32> %b2, <4 x i32> %a4, <4 x i32> %b4, <8 x i32> %a8, <8 x i32> %b8, <16 x i32> %a16, <16 x i32> %b16, <32 x i32> %a32, <32 x i32> %b32, <64 x i32> %a64, <64 x i32> %b64) {
; SSE2-LABEL: 'mul_sext_vXi32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_vXi32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_vXi32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_vXi32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_vXi32'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_vXi32'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_vXi32'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_vXi32'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_vXi32'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = sext <2 x i32> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = sext <4 x i32> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = sext <8 x i32> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = sext <16 x i32> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i32> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i32> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = sext <2 x i32> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = sext <4 x i32> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = sext <8 x i32> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = sext <16 x i32> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = sext <32 x i32> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = sext <64 x i32> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i32> %a2 to <2 x i64>
%xb2 = sext <2 x i32> %b2 to <2 x i64>
@@ -1567,172 +1567,172 @@ define void @mul_sext_vXi32(<2 x i32> %a2, <2 x i32> %b2, <4 x i32> %a4, <4 x i3
define void @mul_zext_vXi32(<2 x i32> %a2, <2 x i32> %b2, <4 x i32> %a4, <4 x i32> %b4, <8 x i32> %a8, <8 x i32> %b8, <16 x i32> %a16, <16 x i32> %b16, <32 x i32> %a32, <32 x i32> %b32, <64 x i32> %a64, <64 x i32> %b64) {
; SSE2-LABEL: 'mul_zext_vXi32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_zext_vXi32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_zext_vXi32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_zext_vXi32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'mul_zext_vXi32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_zext_vXi32'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_zext_vXi32'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = zext <2 x i32> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = zext <4 x i32> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = zext <8 x i32> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = zext <16 x i32> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = zext <32 x i32> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i32> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = zext <2 x i32> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = zext <4 x i32> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = zext <8 x i32> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = zext <16 x i32> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = zext <32 x i32> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = zext <64 x i32> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:5 SizeLat:1 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:10 SizeLat:2 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:20 SizeLat:4 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:40 SizeLat:8 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:80 SizeLat:16 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:160 SizeLat:32 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = zext <2 x i32> %a2 to <2 x i64>
%xb2 = zext <2 x i32> %b2 to <2 x i64>
@@ -1757,214 +1757,214 @@ define void @mul_zext_vXi32(<2 x i32> %a2, <2 x i32> %b2, <4 x i32> %a4, <4 x i3
define void @mul_sext_zext_vXi32(<2 x i32> %a2, <2 x i32> %b2, <4 x i32> %a4, <4 x i32> %b4, <8 x i32> %a8, <8 x i32> %b8, <16 x i32> %a16, <16 x i32> %b16, <32 x i32> %a32, <32 x i32> %b32, <64 x i32> %a64, <64 x i32> %b64) {
; SSE2-LABEL: 'mul_sext_zext_vXi32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSE2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSSE3-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found costs of RThru:28 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found costs of RThru:56 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:112 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found costs of RThru:224 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:19 Lat:15 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:38 Lat:30 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:76 Lat:60 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found costs of RThru:96 CodeSize:152 Lat:120 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found costs of RThru:192 CodeSize:304 Lat:240 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:20 SizeLat:26 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:40 SizeLat:52 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:80 SizeLat:104 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found costs of RThru:96 CodeSize:128 Lat:160 SizeLat:208 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'mul_sext_zext_vXi32'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512F-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512F-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512F-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'mul_sext_zext_vXi32'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:8 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:10 SizeLat:13 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:9 SizeLat:8 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:18 SizeLat:16 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:24 CodeSize:32 Lat:36 SizeLat:32 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:48 CodeSize:64 Lat:72 SizeLat:64 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'mul_sext_zext_vXi32'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res2 = mul <2 x i64> %xa2, %xb2
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:15 SizeLat:3 for: %res4 = mul <4 x i64> %xa4, %xb4
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:15 SizeLat:3 for: %res8 = mul <8 x i64> %xa8, %xb8
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:6 CodeSize:2 Lat:30 SizeLat:6 for: %res16 = mul <16 x i64> %xa16, %xb16
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:60 SizeLat:12 for: %res32 = mul <32 x i64> %xa32, %xb32
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:120 SizeLat:24 for: %res64 = mul <64 x i64> %xa64, %xb64
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'mul_sext_zext_vXi32'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; SLM-NEXT: Cost Model: Found costs of RThru:17 CodeSize:9 Lat:22 SizeLat:9 for: %res2 = mul <2 x i64> %xa2, %xb2
+; SLM-NEXT: Cost Model: Found costs of RThru:34 CodeSize:18 Lat:44 SizeLat:18 for: %res4 = mul <4 x i64> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found costs of RThru:68 CodeSize:36 Lat:88 SizeLat:36 for: %res8 = mul <8 x i64> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found costs of RThru:136 CodeSize:72 Lat:176 SizeLat:72 for: %res16 = mul <16 x i64> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found costs of RThru:272 CodeSize:144 Lat:352 SizeLat:144 for: %res32 = mul <32 x i64> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found costs of RThru:544 CodeSize:288 Lat:704 SizeLat:288 for: %res64 = mul <64 x i64> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi32'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xa2 = sext <2 x i32> %a2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xb2 = zext <2 x i32> %b2 to <2 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xa4 = sext <4 x i32> %a4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xb4 = zext <4 x i32> %b4 to <4 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xa8 = sext <8 x i32> %a8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %xb8 = zext <8 x i32> %b8 to <8 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xa16 = sext <16 x i32> %a16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb16 = zext <16 x i32> %b16 to <16 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa32 = sext <32 x i32> %a32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i32> %b32 to <32 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i32> %a64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i32> %b64 to <64 x i64>
-; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res2 = mul <2 x i64> %xa2, %xb2
-; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = mul <4 x i64> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res8 = mul <8 x i64> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res16 = mul <16 x i64> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res32 = mul <32 x i64> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %res64 = mul <64 x i64> %xa64, %xb64
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xa2 = sext <2 x i32> %a2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 1 for: %xb2 = zext <2 x i32> %b2 to <2 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xa4 = sext <4 x i32> %a4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 2 for: %xb4 = zext <4 x i32> %b4 to <4 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xa8 = sext <8 x i32> %a8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 4 for: %xb8 = zext <8 x i32> %b8 to <8 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xa16 = sext <16 x i32> %a16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 8 for: %xb16 = zext <16 x i32> %b16 to <16 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xa32 = sext <32 x i32> %a32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 16 for: %xb32 = zext <32 x i32> %b32 to <32 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xa64 = sext <64 x i32> %a64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of 32 for: %xb64 = zext <64 x i32> %b64 to <64 x i64>
+; GLM-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:10 SizeLat:10 for: %res2 = mul <2 x i64> %xa2, %xb2
+; GLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:20 SizeLat:20 for: %res4 = mul <4 x i64> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:40 Lat:40 SizeLat:40 for: %res8 = mul <8 x i64> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:80 Lat:80 SizeLat:80 for: %res16 = mul <16 x i64> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:160 Lat:160 SizeLat:160 for: %res32 = mul <32 x i64> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found costs of RThru:192 CodeSize:320 Lat:320 SizeLat:320 for: %res64 = mul <64 x i64> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%xa2 = sext <2 x i32> %a2 to <2 x i64>
%xb2 = zext <2 x i32> %b2 to <2 x i64>
More information about the llvm-commits
mailing list