[llvm] 77dcdc2 - [CostModel][X86] Pre-SSE41 targets can use PMADDWD for sext sub-i16 -> i32

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 14 04:21:08 PDT 2021


Author: Simon Pilgrim
Date: 2021-10-14T12:17:40+01:00
New Revision: 77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542

URL: https://github.com/llvm/llvm-project/commit/77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542
DIFF: https://github.com/llvm/llvm-project/commit/77dcdc2f50cca2e2b9025a1575ba8a5ebbe03542.diff

LOG: [CostModel][X86] Pre-SSE41 targets can use PMADDWD for sext sub-i16 -> i32

Without SSE41 sext/zext instructions the extensions will be split, meaning that the MUL->PMADDWD fold will split the sext_i32(x) into zext_i32(sext_i16(x))

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/mul32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 519b85658be7b..f12399997180b 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -215,20 +215,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
 
     // If both are representable as i15 and at least one is constant,
-    // zero-extended, or sign-extended from vXi16 then we can treat this as
-    // PMADDWD which has the same costs as a vXi16 multiply.
+    // zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
+    // can treat this as PMADDWD which has the same costs as a vXi16 multiply.
     if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
       bool Op1Constant =
           isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
       bool Op2Constant =
           isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
-      bool Op1Sext16 = isa<SExtInst>(Args[0]) && Op1MinSize == 15;
-      bool Op2Sext16 = isa<SExtInst>(Args[1]) && Op2MinSize == 15;
+      bool Op1Sext = isa<SExtInst>(Args[0]) &&
+                     (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
+      bool Op2Sext = isa<SExtInst>(Args[1]) &&
+                     (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
 
       bool IsZeroExtended = !Op1Signed || !Op2Signed;
       bool IsConstant = Op1Constant || Op2Constant;
-      bool IsSext16 = Op1Sext16 || Op2Sext16;
-      if (IsConstant || IsZeroExtended || IsSext16)
+      bool IsSext = Op1Sext || Op2Sext;
+      if (IsConstant || IsZeroExtended || IsSext)
         LT.second =
             MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
     }

diff  --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll
index 1bd70d1e08a4b..59df16e079092 100644
--- a/llvm/test/Analysis/CostModel/X86/mul32.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul32.ll
@@ -32,11 +32,11 @@ define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSSE3-LABEL: 'mul_sext_vXi8'
@@ -50,11 +50,11 @@ define void @mul_sext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %xb32 = sext <32 x i8> %b32 to <32 x i32>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %xb64 = sext <64 x i8> %b64 to <64 x i32>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'mul_sext_vXi8'


        


More information about the llvm-commits mailing list