[llvm] 97e04d4 - [X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): canonicalize to integer type

Tue May 11 11:36:16 PDT 2021

Author: Roman Lebedev
Date: 2021-05-11T21:35:58+03:00
New Revision: 97e04d41e646aa13b0cc5ff3812bfb7305fa4756

URL: https://github.com/llvm/llvm-project/commit/97e04d41e646aa13b0cc5ff3812bfb7305fa4756
DIFF: https://github.com/llvm/llvm-project/commit/97e04d41e646aa13b0cc5ff3812bfb7305fa4756.diff

LOG: [X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): canonicalize to integer type

This way we don't have to duplicate i32/f32 and i64/f64 entries,
which was already forgotten to be done for a few tuples.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 46a973b86af0..66a7b10617aa 100644

--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4686,6 +4686,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
 
   unsigned VF = VecTy->getNumElements() / Factor;
   Type *ScalarTy = VecTy->getElementType();
+  // Deduplicate entries, model floats/pointers as appropriately-sized integers.
+  if (!ScalarTy->isIntegerTy())
+    ScalarTy =
+        Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
 
   // Get the cost of all the memory operations.
   InstructionCost MemOpCosts = getMemoryOpCost(
@@ -4699,22 +4703,22 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
                                              CostKind);
 
   // TODO: Complete for other data-types and strides.
-  // Each combination of Stride, ElementTy and VF results in a 
diff erent
+  // Each combination of Stride, element bit width and VF results in a 
diff erent
   // sequence; The cost tables are therefore accessed with:
-  // Factor (stride) and VectorType=VFxElemType.
+  // Factor (stride) and VectorType=VFxiN.
   // The Cost accounts only for the shuffle sequence;
   // The cost of the loads/stores is accounted for separately.
   //
   static const CostTblEntry AVX2InterleavedLoadTbl[] = {
     { 2, MVT::v4i64, 6 }, //(load 8i64 and) deinterleave into 2 x 4i64
-    { 2, MVT::v4f64, 6 }, //(load 8f64 and) deinterleave into 2 x 4f64
 
     { 3, MVT::v2i8,  10 }, //(load 6i8 and)  deinterleave into 3 x 2i8
     { 3, MVT::v4i8,  4 },  //(load 12i8 and) deinterleave into 3 x 4i8
     { 3, MVT::v8i8,  9 },  //(load 24i8 and) deinterleave into 3 x 8i8
     { 3, MVT::v16i8, 11},  //(load 48i8 and) deinterleave into 3 x 16i8
     { 3, MVT::v32i8, 13},  //(load 96i8 and) deinterleave into 3 x 32i8
-    { 3, MVT::v8f32, 17 }, //(load 24f32 and)deinterleave into 3 x 8f32
+
+    { 3, MVT::v8i32, 17 }, //(load 24i32 and)deinterleave into 3 x 8i32
 
     { 4, MVT::v2i8,  12 }, //(load 8i8 and)   deinterleave into 4 x 2i8
     { 4, MVT::v4i8,  4 },  //(load 16i8 and)  deinterleave into 4 x 4i8
@@ -4722,12 +4726,11 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
     { 4, MVT::v16i8, 39 }, //(load 64i8 and)  deinterleave into 4 x 16i8
     { 4, MVT::v32i8, 80 }, //(load 128i8 and) deinterleave into 4 x 32i8
 
-    { 8, MVT::v8f32, 40 }  //(load 64f32 and)deinterleave into 8 x 8f32
+    { 8, MVT::v8i32, 40 }  //(load 64i32 and)deinterleave into 8 x 8i32
   };
 
   static const CostTblEntry AVX2InterleavedStoreTbl[] = {
     { 2, MVT::v4i64, 6 }, //interleave into 2 x 4i64 into 8i64 (and store)
-    { 2, MVT::v4f64, 6 }, //interleave into 2 x 4f64 into 8f64 (and store)
 
     { 3, MVT::v2i8,  7 },  //interleave 3 x 2i8  into 6i8 (and store)
     { 3, MVT::v4i8,  8 },  //interleave 3 x 4i8  into 12i8 (and store)