[llvm] 77dcdc2 - [CostModel][X86] Pre-SSE41 targets can use PMADDWD for sext sub-i16 -> i32
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 14 04:21:08 PDT 2021
Author: Simon Pilgrim
Date: 2021-10-14T12:17:40+01:00
New Revision: 77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542
URL: https://github.com/llvm/llvm-project/commit/77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542
DIFF: https://github.com/llvm/llvm-project/commit/77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542.diff
LOG: [CostModel][X86] Pre-SSE41 targets can use PMADDWD for sext sub-i16 -> i32
Without SSE41 sext/zext instructions the extensions will be split, meaning that the MUL->PMADDWD fold will split the sext_i32(x) into zext_i32(sext_i16(x))
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/mul32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 519b85658be7b..f12399997180b 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -215,20 +215,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
// If both are representable as i15 and at least one is constant,
- // zero-extended, or sign-extended from vXi16 then we can treat this as
- // PMADDWD which has the same costs as a vXi16 multiply.
+ // zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
+ // can treat this as PMADDWD which has the same costs as a vXi16 multiply.
if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
bool Op1Constant =
isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
bool Op2Constant =
isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
- bool Op1Sext16 = isa<SExtInst>(Args[0]) && Op1MinSize == 15;
- bool Op2Sext16 = isa<SExtInst>(Args[1]) && Op2MinSize == 15;
+ bool Op1Sext = isa<SExtInst>(Args[0]) &&
+ (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
+ bool Op2Sext = isa<SExtInst>(Args[1]) &&
+ (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
bool IsZeroExtended = !Op1Signed || !Op2Signed;
bool IsConstant = Op1Constant || Op2Constant;
- bool IsSext16 = Op1Sext16 || Op2Sext16;
- if (IsConstant || IsZeroExtended || IsSext16)
+ bool IsSext = Op1Sext || Op2Sext;
+ if (IsConstant || IsZeroExtended || IsSext)
LT.second =
MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
}
diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll
index 1bd70d1e08a4b..59df16e079092 100644
--- a/llvm/test/Analysis/CostModel/X86/mul32.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul32.ll
@@ -32,11 +32,11 @@ define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'mul_sext_vXi8'
@@ -50,11 +50,11 @@ define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'mul_sext_vXi8'
More information about the llvm-commits
mailing list