[llvm] r325134 - [SLP] Allow vectorization of reversed loads.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 14 07:29:15 PST 2018


Author: abataev
Date: Wed Feb 14 07:29:15 2018
New Revision: 325134

URL: http://llvm.org/viewvc/llvm-project?rev=325134&view=rev
Log:
[SLP] Allow vectorization of reversed loads.

Summary:
Reversed loads are handled as gathering. But we can just reshuffle
these values. Patch adds support for vectorization of reversed loads.

Reviewers: RKSimon, spatel, mkuper, hfinkel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D43022

Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=325134&r1=325133&r2=325134&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Feb 14 07:29:15 2018
@@ -1629,15 +1629,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
             break;
           }
 
-      BS.cancelScheduling(VL, VL0);
-      newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
-
       if (ReverseConsecutive) {
         --NumOpsWantToKeepOrder[S.Opcode];
-        DEBUG(dbgs() << "SLP: Gathering reversed loads.\n");
-      } else {
-        DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+        newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+        DEBUG(dbgs() << "SLP: added a vector of reversed loads.\n");
+        return;
       }
+
+      DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+      BS.cancelScheduling(VL, VL0);
+      newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
       return;
     }
     case Instruction::ZExt:
@@ -2245,6 +2246,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
           TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
       int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
                                            VecTy, alignment, 0, VL0);
+      if (!isConsecutiveAccess(VL[0], VL[1], *DL, *SE)) {
+        VecLdCost += TTI->getShuffleCost(
+            TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+      }
       return ReuseShuffleCost + VecLdCost - ScalarLdCost;
     }
     case Instruction::Store: {
@@ -3199,6 +3204,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
     case Instruction::Load: {
       // Loads are inserted at the head of the tree because we don't want to
       // sink them all the way down past store instructions.
+      bool IsReversed =
+          !isConsecutiveAccess(E->Scalars[0], E->Scalars[1], *DL, *SE);
+      if (IsReversed)
+        VL0 = cast<Instruction>(E->Scalars.back());
       setInsertPointAfterBundle(E->Scalars, VL0);
 
       LoadInst *LI = cast<LoadInst>(VL0);
@@ -3222,6 +3231,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
       }
       LI->setAlignment(Alignment);
       Value *V = propagateMetadata(LI, E->Scalars);
+      if (IsReversed) {
+        SmallVector<uint32_t, 4> Mask(E->Scalars.size());
+        std::iota(Mask.rbegin(), Mask.rend(), 0);
+        V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask);
+      }
       if (NeedToShuffleReuses) {
         V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
                                         E->ReuseShuffleIndices, "shuffle");

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll?rev=325134&r1=325133&r2=325134&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR32086.ll Wed Feb 14 07:29:15 2018
@@ -33,15 +33,15 @@ define void @i64_simplified(i64* noalias
 define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
 ; CHECK-LABEL: @i64_simplifiedi_reversed(
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
-; CHECK-NEXT:    [[T0:%.*]] = load i64, i64* [[LD]], align 8
-; CHECK-NEXT:    [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
-; CHECK-NEXT:    store i64 [[T1]], i64* [[ST]], align 8
-; CHECK-NEXT:    store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    store i64 [[T1]], i64* [[ARRAYIDX4]], align 8
-; CHECK-NEXT:    store i64 [[T0]], i64* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
+; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP4]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1




More information about the llvm-commits mailing list