[llvm-branch-commits] [llvm-branch] r351440 - Merging r351349:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 17 05:26:58 PST 2019
Author: hans
Date: Thu Jan 17 05:26:58 2019
New Revision: 351440
URL: http://llvm.org/viewvc/llvm-project?rev=351440&view=rev
Log:
Merging r351349:
------------------------------------------------------------------------
r351349 | abataev | 2019-01-16 16:39:52 +0100 (Wed, 16 Jan 2019) | 14 lines
[SLP] Fix PR40310: The reduction nodes should stay scalar.
Summary:
Sometimes the SLP vectorizer tries to vectorize the horizontal reduction
nodes during regular vectorization. This may happen inside of the loops,
when there are some vectorizable PHIs. Patch fixes this by checking if
the node is the reduction node and thus it must not be vectorized, it must
be gathered.
Reviewers: RKSimon, spatel, hfinkel, fedor.sergeev
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D56783
------------------------------------------------------------------------
Modified:
llvm/branches/release_80/ (props changed)
llvm/branches/release_80/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR39774.ll
llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR40310.ll
Propchange: llvm/branches/release_80/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 17 05:26:58 2019
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,351436
+/llvm/trunk:155241,351349,351436
Modified: llvm/branches/release_80/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=351440&r1=351439&r2=351440&view=diff
==============================================================================
--- llvm/branches/release_80/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/branches/release_80/lib/Transforms/Vectorize/SLPVectorizer.cpp Thu Jan 17 05:26:58 2019
@@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
+ // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i])) {
+ if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;
Modified: llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR39774.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR39774.ll?rev=351440&r1=351439&r2=351440&view=diff
==============================================================================
--- llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR39774.ll (original)
+++ llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR39774.ll Thu Jan 17 05:26:58 2019
@@ -3,93 +3,185 @@
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION
define void @Test(i32) {
-; ALL-LABEL: @Test(
-; ALL-NEXT: entry:
-; ALL-NEXT: br label [[LOOP:%.*]]
-; ALL: loop:
-; ALL-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; ALL-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; ALL-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
-; ALL-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
-; ALL-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
-; ALL-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
-; ALL-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
-; ALL-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
-; ALL-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
-; ALL-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
-; ALL-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
-; ALL-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
-; ALL-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
-; ALL-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
-; ALL-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
-; ALL-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
-; ALL-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
-; ALL-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
-; ALL-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
-; ALL-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
-; ALL-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
-; ALL-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
-; ALL-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
-; ALL-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
-; ALL-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
-; ALL-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
-; ALL-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
-; ALL-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
-; ALL-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
-; ALL-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
-; ALL-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
-; ALL-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
-; ALL-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
-; ALL-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
-; ALL-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
-; ALL-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
-; ALL-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
-; ALL-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
-; ALL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
-; ALL-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1
-; ALL-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
-; ALL-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
-; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
-; ALL-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]]
-; ALL-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; ALL-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
-; ALL-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; ALL-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; ALL-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
-; ALL-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; ALL-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
-; ALL-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]]
-; ALL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
-; ALL-NEXT: br label [[LOOP]]
+; CHECK-LABEL: @Test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
+; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
+; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
+; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
+; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
+; CHECK-NEXT: br label [[LOOP]]
+;
+; FORCE_REDUCTION-LABEL: @Test(
+; FORCE_REDUCTION-NEXT: entry:
+; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]]
+; FORCE_REDUCTION: loop:
+; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
+; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> <i32 0, i32 55, i32 285, i32 1240>, [[SHUFFLE]]
+; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
+; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
+; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
+; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
+; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; FORCE_REDUCTION-NEXT: [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
+; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
+; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
+; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
+; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
+; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
+; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0
+; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
+; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]
+; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT: br label [[LOOP]]
;
entry:
br label %loop
Modified: llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR40310.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR40310.ll?rev=351440&r1=351439&r2=351440&view=diff
==============================================================================
--- llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR40310.ll (original)
+++ llvm/branches/release_80/test/Transforms/SLPVectorizer/X86/PR40310.ll Thu Jan 17 05:26:58 2019
@@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 *
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 undef>, i32 [[PARAM:%.*]], i32 1
; CHECK-NEXT: br label [[BCI_15:%.*]]
; CHECK: bci_15:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
@@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 *
; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]]
; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]]
; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> <i32 16, i32 undef>, i32 [[V42]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 *
; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
-; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]]
+; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
+; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: ret void
More information about the llvm-branch-commits
mailing list