[llvm] f114ef3 - [CostModel][X86] Add generic costs for vXi32 MUL -> v2Xi16 PMADDDW folds
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 5 08:09:47 PDT 2021
Author: Simon Pilgrim
Date: 2021-09-05T16:08:11+01:00
New Revision: f114ef3731dfd79e8f235cc02ec9879c489ebf96
URL: https://github.com/llvm/llvm-project/commit/f114ef3731dfd79e8f235cc02ec9879c489ebf96
DIFF: https://github.com/llvm/llvm-project/commit/f114ef3731dfd79e8f235cc02ec9879c489ebf96.diff
LOG: [CostModel][X86] Add generic costs for vXi32 MUL -> v2Xi16 PMADDDW folds
Based off the improved fold in D108522
This should eventually allow us to replace the SLM only cost patterns with generic versions.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/mul32.ll
llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6a68326e6452b..5df4a3a208180 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -206,6 +206,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
+ LT.second.getScalarType() == MVT::i32) {
+ // Check if the operands can be represented as a smaller datatype.
+ bool Op1Signed = false, Op2Signed = false;
+ unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
+ unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
+ unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
+
+ // If both are representable as i15 and at least one is zero-extended,
+ // then we can treat this as PMADDWD which has the same costs
+ // as a vXi16 multiply..
+ if (OpMinSize <= 15 && (!Op1Signed || !Op2Signed) && !ST->isPMADDWDSlow())
+ LT.second =
+ MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
+ }
+
if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
ISD == ISD::UREM) &&
(Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
@@ -288,6 +304,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
if (ST->isSLM()) {
if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
// Check if the operands can be shrinked into a smaller datatype.
+ // TODO: Merge this into generiic vXi32 MUL patterns above.
bool Op1Signed = false;
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
bool Op2Signed = false;
diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll
index 2dfb2b574601b..4246d2a1857b2 100644
--- a/llvm/test/Analysis/CostModel/X86/mul32.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul32.ll
@@ -195,11 +195,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'mul_zext_vXi8'
@@ -213,11 +213,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'mul_zext_vXi8'
@@ -231,11 +231,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'mul_zext_vXi8'
@@ -249,11 +249,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'mul_zext_vXi8'
@@ -267,11 +267,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'mul_zext_vXi8'
@@ -303,11 +303,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; GLM-LABEL: 'mul_zext_vXi8'
@@ -321,11 +321,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32>
; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%xa4 = zext <4 x i8> %a4 to <4 x i32>
@@ -358,11 +358,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'mul_sext_zext_vXi8'
@@ -376,11 +376,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'mul_sext_zext_vXi8'
@@ -394,11 +394,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'mul_sext_zext_vXi8'
@@ -412,11 +412,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'mul_sext_zext_vXi8'
@@ -430,11 +430,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'mul_sext_zext_vXi8'
@@ -466,11 +466,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; GLM-LABEL: 'mul_sext_zext_vXi8'
@@ -484,11 +484,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i
; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32>
; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%xa4 = sext <4 x i8> %a4 to <4 x i32>
diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
index 6d169bdcd41f7..c6fab69729790 100644
--- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
+++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
@@ -76,12 +76,12 @@ entry:
define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) {
; SLM-LABEL: 'slm-costs_8_v4_zext_mul'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; GLM-LABEL: 'slm-costs_8_v4_zext_mul'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
entry:
@@ -93,12 +93,12 @@ entry:
define <4 x i32> @slm-costs_8_v4_zext_mul_fail(<4 x i8> %a) {
; SLM-LABEL: 'slm-costs_8_v4_zext_mul_fail'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; GLM-LABEL: 'slm-costs_8_v4_zext_mul_fail'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
entry:
@@ -110,12 +110,12 @@ entry:
define <4 x i32> @slm-costs_8_v4_zext_mul_fail_2(<4 x i8> %a) {
; SLM-LABEL: 'slm-costs_8_v4_zext_mul_fail_2'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; GLM-LABEL: 'slm-costs_8_v4_zext_mul_fail_2'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
index ad79e38cafa08..2702e0f39dab6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
@@ -36,13 +36,13 @@ for.body: ; preds = %for.body.preheader,
%mul = mul nsw i32 %conv3, %conv
; sources of the mul is zext\sext from i8
; use pmulhw\pmullw\pshuf
-; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32
%conv4 = zext i8 %1 to i32
%mul2 = mul nsw i32 %conv4, %conv
%sum0 = add i32 %mul, %mul2
; sources of the mul is zext\zext from i8
; use pmullw\zext
-; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32
%conv5 = zext i8 %0 to i32
%mul3 = mul nsw i32 %conv5, %conv4
%sum1 = add i32 %sum0, %mul3
@@ -53,17 +53,17 @@ for.body: ; preds = %for.body.preheader,
%sum2 = add i32 %sum1, %mul4
; sources of the mul is sext\250
; use pmulhw\pmullw\pshuf
-; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32
%mul5 = mul nsw i32 250, %conv3
%sum3 = add i32 %sum2, %mul5
; sources of the mul is zext\-120
; use pmulhw\pmullw\pshuf
-; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32
+; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32
%mul6 = mul nsw i32 -120, %conv4
%sum4 = add i32 %sum3, %mul6
; sources of the mul is zext\250
; use pmullw\zext
-; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32
+; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32
%mul7 = mul nsw i32 250, %conv4
%sum5 = add i32 %sum4, %mul7
%add = add i32 %acc.013, 5
More information about the llvm-commits
mailing list