[llvm] r272391 - Revert "[TTI] Refine default cost for interleaved load groups with gaps"

Fri Jun 10 05:41:34 PDT 2016

Author: mssimpso
Date: Fri Jun 10 07:41:33 2016
New Revision: 272391

URL: http://llvm.org/viewvc/llvm-project?rev=272391&view=rev
Log:
Revert "[TTI] Refine default cost for interleaved load groups with gaps"

This reverts commit r272385. This commit broke the build. I'm temporarily
reverting to investigate.

Modified:
    llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll

Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=272391&r1=272390&r2=272391&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Fri Jun 10 07:41:33 2016
@@ -542,50 +542,6 @@ public:
     unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
         Opcode, VecTy, Alignment, AddressSpace);
 
-    // Legalize the vector type, and get the legalized and unlegalized type
-    // sizes.
-    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
-    unsigned VecTySize = DL.getTypeStoreSize(VecTy);
-    unsigned VecTyLTSize = VecTyLT.getStoreSize();
-
-    // Return the ceiling of dividing A by B.
-    auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
-
-    // Scale the cost of the memory operation by the fraction of legalized
-    // instructions that will actually be used. We shouldn't account for the
-    // cost of dead instructions since they will be removed.
-    //
-    // E.g., An interleaved load of factor 8:
-    //       %vec = load <16 x i64>, <16 x i64>* %ptr
-    //       %v0 = shufflevector %vec, undef, <0, 8>
-    //
-    // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
-    // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
-    // type). The other loads are unused.
-    //
-    // We only scale the cost of loads since interleaved store groups aren't
-    // allowed to have gaps.
-    if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
-
-      // The number of loads of a legal type it will take to represent a load
-      // of the unlegalized vector type.
-      unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
-
-      // The number of elements of the unlegalized type that correspond to a
-      // single legal instruction.
-      unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
-
-      // Determine which legal instructions will be used.
-      BitVector UsedInsts(NumLegalInsts, false);
-      for (unsigned Index : Indices)
-        for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
-          UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
-
-      // Scale the cost of the load by the fraction of legal instructions that
-      // will be used.
-      Cost *= UsedInsts.count() / NumLegalInsts;
-    }
-
     // Then plus the cost of interleave operation.
     if (Opcode == Instruction::Load) {
       // The interleave cost is similar to extract sub vectors' elements

Modified: llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll?rev=272391&r1=272390&r2=272391&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll Fri Jun 10 07:41:33 2016
@@ -14,7 +14,6 @@ entry:
 ; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
 ; access group is 2.
 
-; CHECK: LV: Checking a loop in "test_byte_interleaved_cost"
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
 ; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
 
@@ -38,44 +37,3 @@ for.body:
 for.end:                                          ; preds = %for.body
   ret void
 }
-
-%ig.factor.8 = type { double*, double, double, double, double, double, double, double }
-define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) {
-entry:
-  br label %for.body
-
-; Check the default cost of a strided load with a factor that is greater than
-; the maximum allowed. In this test, the interleave factor would be 8, which is
-; not supported.
-
-; CHECK: LV: Checking a loop in "wide_interleaved_group"
-; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction:   %1 = load double, double* %0, align 8
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %5 = load double, double* %4, align 8
-; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction:   store double %9, double* %10, align 8
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ]
-  %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2
-  %1 = load double, double* %0, align 8
-  %2 = fcmp fast olt double %1, %a
-  %3 = select i1 %2, double 0.000000e+00, double %1
-  %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6
-  %5 = load double, double* %4, align 8
-  %6 = fcmp fast olt double %5, %a
-  %7 = select i1 %6, double 0.000000e+00, double %5
-  %8 = fmul fast double %7, %b
-  %9 = fadd fast double %8, %3
-  %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3
-  store double %9, double* %10, align 8
-  %11 = fmul fast double %9, %9
-  %12 = fadd fast double %11, %r
-  %i.next = add nuw nsw i64 %i, 1
-  %13 = trunc i64 %i.next to i32
-  %cond = icmp eq i32 %13, %n
-  br i1 %cond, label %for.exit, label %for.body
-
-for.exit:
-  %r.lcssa = phi double [ %12, %for.body ]
-  ret double %r.lcssa
-}