[PATCH] D43022: [SLP] Allow vectorization of reversed loads.
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 12 08:43:56 PST 2018
ABataev updated this revision to Diff 133869.
ABataev added a comment.
Fixed final vectorization of reversed loads by starting from the last load instructions, not the first one.
Repository:
rL LLVM
https://reviews.llvm.org/D43022
Files:
lib/Transforms/Vectorize/SLPVectorizer.cpp
test/Transforms/SLPVectorizer/X86/PR32086.ll
Index: test/Transforms/SLPVectorizer/X86/PR32086.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/PR32086.ll
+++ test/Transforms/SLPVectorizer/X86/PR32086.ll
@@ -33,15 +33,15 @@
define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
; CHECK-LABEL: @i64_simplifiedi_reversed(
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
-; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
-; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
-; CHECK-NEXT: store i64 [[T1]], i64* [[ST]], align 8
-; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
-; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX4]], align 8
-; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX5]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[ST]] to <4 x i64>*
+; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP4]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1629,15 +1629,16 @@
break;
}
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
-
if (ReverseConsecutive) {
--NumOpsWantToKeepOrder[S.Opcode];
- DEBUG(dbgs() << "SLP: Gathering reversed loads.\n");
- } else {
- DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+ newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ DEBUG(dbgs() << "SLP: added a vector of reversed loads.\n");
+ return;
}
+
+ DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
return;
}
case Instruction::ZExt:
@@ -2245,6 +2246,10 @@
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
VecTy, alignment, 0, VL0);
+ if (!isConsecutiveAccess(VL[0], VL[1], *DL, *SE)) {
+ VecLdCost += TTI->getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
return ReuseShuffleCost + VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
@@ -3199,6 +3204,10 @@
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
+ bool IsReversed =
+ !isConsecutiveAccess(E->Scalars[0], E->Scalars[1], *DL, *SE);
+ if (IsReversed)
+ VL0 = cast<Instruction>(E->Scalars.back());
setInsertPointAfterBundle(E->Scalars, VL0);
LoadInst *LI = cast<LoadInst>(VL0);
@@ -3222,6 +3231,11 @@
}
LI->setAlignment(Alignment);
Value *V = propagateMetadata(LI, E->Scalars);
+ if (IsReversed) {
+ SmallVector<uint32_t, 4> Mask(E->Scalars.size());
+ std::iota(Mask.rbegin(), Mask.rend(), 0);
+ V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask);
+ }
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D43022.133869.patch
Type: text/x-patch
Size: 4188 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180212/0848f51a/attachment.bin>
More information about the llvm-commits
mailing list