[llvm] 7ff5770 - [SLP] allow forming 2-way reduction patterns
Eric Christopher via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 15:21:46 PST 2019
Following up to the thread here.
Per our discussion I've gone ahead and reverted this as I'm still
seeing a number of miscompiles even after other changes. I'll work on
getting you a testcase.
echristo at jhereg ~/s/llvm-project> git push
To github.com:llvm/llvm-project.git
24aafcadff3..cd8748a15f2 master -> master
Thanks!
-eric
On Thu, Nov 7, 2019 at 4:11 AM Sanjay Patel via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Sanjay Patel
> Date: 2019-11-07T06:08:42-05:00
> New Revision: 7ff57705ba196ce649d6034614b3b9df57e1f84f
>
> URL: https://github.com/llvm/llvm-project/commit/7ff57705ba196ce649d6034614b3b9df57e1f84f
> DIFF: https://github.com/llvm/llvm-project/commit/7ff57705ba196ce649d6034614b3b9df57e1f84f.diff
>
> LOG: [SLP] allow forming 2-way reduction patterns
>
> We have a vector compare reduction problem seen in PR39665 comment 2:
> https://bugs.llvm.org/show_bug.cgi?id=39665#c2
>
> Or slightly reduced here:
>
> define i1 @cmp2(<2 x double> %a0) {
> %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
> %b = extractelement <2 x i1> %a, i32 0
> %c = extractelement <2 x i1> %a, i32 1
> %d = and i1 %b, %c
> ret i1 %d
> }
>
> SLP would not attempt to turn this into a vector reduction because there is an
> artificial lower limit on that transform. We can not completely remove that limit
> without inducing regressions though, so this patch just hacks an extra attempt at
> creating a 2-way reduction to the end of the analysis.
>
> As shown in the test file, we are still not getting some of the motivating cases,
> so follow-on patches will be needed to solve those cases.
>
> Differential Revision: https://reviews.llvm.org/D59710
>
> Added:
>
>
> Modified:
> llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
> llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> llvm/test/Feature/weak_constant.ll
> llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
>
> Removed:
>
>
>
> ################################################################################
> diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
> index 32ccc8a46380..d06de6cd4e3d 100644
> --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
> +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
> @@ -113,9 +113,12 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
>
> /// Try to find horizontal reduction or otherwise vectorize a chain of binary
> /// operators.
> + /// \p Try2WayRdx specializes the analysis to only attempt a 2-element
> + /// reduction.
> bool vectorizeRootInstruction(PHINode *P, Value *V, BasicBlock *BB,
> slpvectorizer::BoUpSLP &R,
> - TargetTransformInfo *TTI);
> + TargetTransformInfo *TTI,
> + bool Try2WayRdx = false);
>
> /// Try to vectorize trees that start at insertvalue instructions.
> bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
>
> diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> index c0c1a451e951..33e388a21c54 100644
> --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -6397,7 +6397,7 @@ class HorizontalReduction {
>
> /// Attempt to vectorize the tree found by
> /// matchAssociativeReduction.
> - bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
> + bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI, bool Try2WayRdx) {
> if (ReducedVals.empty())
> return false;
>
> @@ -6405,11 +6405,14 @@ class HorizontalReduction {
> // to a nearby power-of-2. Can safely generate oversized
> // vectors and rely on the backend to split them to legal sizes.
> unsigned NumReducedVals = ReducedVals.size();
> - if (NumReducedVals < 4)
> + if (Try2WayRdx && NumReducedVals != 2)
> + return false;
> + unsigned MinRdxVals = Try2WayRdx ? 2 : 4;
> + if (NumReducedVals < MinRdxVals)
> return false;
>
> unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
> -
> + unsigned MinRdxWidth = Log2_32(MinRdxVals);
> Value *VectorizedTree = nullptr;
>
> // FIXME: Fast-math-flags should be set based on the instructions in the
> @@ -6433,7 +6436,7 @@ class HorizontalReduction {
> SmallVector<Value *, 16> IgnoreList;
> for (auto &V : ReductionOps)
> IgnoreList.append(V.begin(), V.end());
> - while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
> + while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > MinRdxWidth) {
> auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
> V.buildTree(VL, ExternallyUsedValues, IgnoreList);
> Optional<ArrayRef<unsigned>> Order = V.bestOrder();
> @@ -6759,7 +6762,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
> /// performed.
> static bool tryToVectorizeHorReductionOrInstOperands(
> PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
> - TargetTransformInfo *TTI,
> + TargetTransformInfo *TTI, bool Try2WayRdx,
> const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
> if (!ShouldVectorizeHor)
> return false;
> @@ -6790,7 +6793,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
> if (BI || SI) {
> HorizontalReduction HorRdx;
> if (HorRdx.matchAssociativeReduction(P, Inst)) {
> - if (HorRdx.tryToReduce(R, TTI)) {
> + if (HorRdx.tryToReduce(R, TTI, Try2WayRdx)) {
> Res = true;
> // Set P to nullptr to avoid re-analysis of phi node in
> // matchAssociativeReduction function unless this is the root node.
> @@ -6833,7 +6836,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
>
> bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
> BasicBlock *BB, BoUpSLP &R,
> - TargetTransformInfo *TTI) {
> + TargetTransformInfo *TTI,
> + bool Try2WayRdx) {
> if (!V)
> return false;
> auto *I = dyn_cast<Instruction>(V);
> @@ -6846,7 +6850,7 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
> auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
> return tryToVectorize(I, R);
> };
> - return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
> + return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, Try2WayRdx,
> ExtraVectorization);
> }
>
> @@ -7042,6 +7046,23 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
> PostProcessInstructions.push_back(&*it);
> }
>
> + // Make a final attempt to match a 2-way reduction if nothing else worked.
> + // We do not try this above because it may interfere with other vectorization
> + // attempts.
> + // TODO: The constraints are copied from the above call to
> + // vectorizeRootInstruction(), but that might be too restrictive?
> + BasicBlock::iterator LastInst = --BB->end();
> + if (!Changed && LastInst->use_empty() &&
> + (LastInst->getType()->isVoidTy() || isa<CallInst>(LastInst) ||
> + isa<InvokeInst>(LastInst))) {
> + if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(LastInst)) {
> + for (auto *V : LastInst->operand_values()) {
> + Changed |= vectorizeRootInstruction(nullptr, V, BB, R, TTI,
> + /* Try2WayRdx */ true);
> + }
> + }
> + }
> +
> return Changed;
> }
>
>
> diff --git a/llvm/test/Feature/weak_constant.ll b/llvm/test/Feature/weak_constant.ll
> index 4ac2e7e7d689..9a2ea126ebc2 100644
> --- a/llvm/test/Feature/weak_constant.ll
> +++ b/llvm/test/Feature/weak_constant.ll
> @@ -1,5 +1,5 @@
> ; RUN: opt < %s -O3 -S > %t
> -; RUN: grep undef %t | count 1
> +; RUN: grep undef %t | count 2
> ; RUN: grep 5 %t | count 1
> ; RUN: grep 7 %t | count 1
> ; RUN: grep 9 %t | count 1
>
> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
> index b5f433549274..fef9a8e50cdf 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
> @@ -54,10 +54,10 @@ define double @foo(double* nocapture %D) {
> define i1 @two_wide_fcmp_reduction(<2 x double> %a0) {
> ; CHECK-LABEL: @two_wide_fcmp_reduction(
> ; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], <double 1.000000e+00, double 1.000000e+00>
> -; CHECK-NEXT: [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0
> -; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1
> -; CHECK-NEXT: [[D:%.*]] = and i1 [[B]], [[C]]
> -; CHECK-NEXT: ret i1 [[D]]
> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i1> [[A]], [[RDX_SHUF]]
> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0
> +; CHECK-NEXT: ret i1 [[TMP1]]
> ;
> %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
> %b = extractelement <2 x i1> %a, i32 0
> @@ -96,12 +96,11 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0
> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1
> ; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
> -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D
> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
> -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D
> -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]]
> -; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
> +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i1> [[TMP8]], [[RDX_SHUF]]
> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0
> +; CHECK-NEXT: br i1 [[TMP9]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
> ; CHECK: lor.lhs.false:
> ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list