[llvm] r261070 - Detecte vector reduction operations just before instruction selection.
David Majnemer via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 17 10:25:41 PST 2016
Looks like this caused https://llvm.org/bugs/show_bug.cgi?id=26652
On Tue, Feb 16, 2016 at 10:37 PM, Cong Hou via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: conghou
> Date: Wed Feb 17 00:37:04 2016
> New Revision: 261070
>
> URL: http://llvm.org/viewvc/llvm-project?rev=261070&view=rev
> Log:
> Detecte vector reduction operations just before instruction selection.
>
> This patch detects vector reductions before instruction selection. Vector
> reductions are vectorized reduction operations, and for such operations we
> have
> freedom to reorganize the elements of the result as long as the reduction
> of them
> stay unchanged. This will enable some reduction pattern recognition during
> instruction combine such as SAD/dot-product on X86. A flag is added to
> SDNodeFlags to mark those vector reduction nodes to be checked during
> instruction
> combine.
>
> To detect those vector reductions, we search def-use chains starting from
> the
> given instruction, and check if all uses fall into two categories:
>
> 1. Reduction with another vector.
> 2. Reduction on all elements.
>
> in which 2 is detected by recognizing the pattern that the loop vectorizer
> generates to reduce all elements in the vector outside of the loop, which
> includes several ShuffleVector and one ExtractElement instructions.
>
>
> Differential revision: http://reviews.llvm.org/D15250
>
>
> Added:
> llvm/trunk/test/CodeGen/Generic/vector-redux.ll
> Modified:
> llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>
> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=261070&r1=261069&r2=261070&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Wed Feb 17
> 00:37:04 2016
> @@ -328,6 +328,7 @@ private:
> bool NoInfs : 1;
> bool NoSignedZeros : 1;
> bool AllowReciprocal : 1;
> + bool VectorReduction : 1;
>
> public:
> /// Default constructor turns off all optimization flags.
> @@ -340,6 +341,7 @@ public:
> NoInfs = false;
> NoSignedZeros = false;
> AllowReciprocal = false;
> + VectorReduction = false;
> }
>
> // These are mutators for each flag.
> @@ -351,6 +353,7 @@ public:
> void setNoInfs(bool b) { NoInfs = b; }
> void setNoSignedZeros(bool b) { NoSignedZeros = b; }
> void setAllowReciprocal(bool b) { AllowReciprocal = b; }
> + void setVectorReduction(bool b) { VectorReduction = b; }
>
> // These are accessors for each flag.
> bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
> @@ -361,6 +364,7 @@ public:
> bool hasNoInfs() const { return NoInfs; }
> bool hasNoSignedZeros() const { return NoSignedZeros; }
> bool hasAllowReciprocal() const { return AllowReciprocal; }
> + bool hasVectorReduction() const { return VectorReduction; }
>
> /// Return a raw encoding of the flags.
> /// This function should only be used to add data to the NodeID value.
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=261070&r1=261069&r2=261070&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Feb 17
> 00:37:04 2016
> @@ -2308,6 +2308,125 @@ void SelectionDAGBuilder::visitFSub(cons
> visitBinary(I, ISD::FSUB);
> }
>
> +/// Checks if the given instruction performs a vector reduction, in which
> case
> +/// we have the freedom to alter the elements in the result as long as the
> +/// reduction of them stays unchanged.
> +static bool isVectorReductionOp(const User *I) {
> + const Instruction *Inst = dyn_cast<Instruction>(I);
> + if (!Inst || !Inst->getType()->isVectorTy())
> + return false;
> +
> + auto OpCode = Inst->getOpcode();
> + switch (OpCode) {
> + case Instruction::Add:
> + case Instruction::Mul:
> + case Instruction::And:
> + case Instruction::Or:
> + case Instruction::Xor:
> + break;
> + case Instruction::FAdd:
> + case Instruction::FMul:
> + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
> + if (FPOp->getFastMathFlags().unsafeAlgebra())
> + break;
> + // Fall through.
> + default:
> + return false;
> + }
> +
> + unsigned ElemNum = Inst->getType()->getVectorNumElements();
> + unsigned ElemNumToReduce = ElemNum;
> +
> + // Do DFS search on the def-use chain from the given instruction. We
> only
> + // allow four kinds of operations during the search until we reach the
> + // instruction that extracts the first element from the vector:
> + //
> + // 1. The reduction operation of the same opcode as the given
> instruction.
> + //
> + // 2. PHI node.
> + //
> + // 3. ShuffleVector instruction together with a reduction operation
> that
> + // does a partial reduction.
> + //
> + // 4. ExtractElement that extracts the first element from the vector,
> and we
> + // stop searching the def-use chain here.
> + //
> + // 3 & 4 above perform a reduction on all elements of the vector. We
> push defs
> + // from 1-3 to the stack to continue the DFS. The given instruction is
> not
> + // a reduction operation if we meet any other instructions other than
> those
> + // listed above.
> +
> + SmallVector<const User *, 16> UsersToVisit{Inst};
> + SmallPtrSet<const User *, 16> Visited;
> + bool ReduxExtracted = false;
> +
> + while (!UsersToVisit.empty()) {
> + auto User = UsersToVisit.back();
> + UsersToVisit.pop_back();
> + if (!Visited.insert(User).second)
> + continue;
> +
> + for (const auto &U : User->users()) {
> + auto Inst = dyn_cast<Instruction>(U);
> + if (!Inst)
> + return false;
> +
> + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
> + if (const FPMathOperator *FPOp = dyn_cast<const
> FPMathOperator>(Inst))
> + if (!isa<PHINode>(FPOp) &&
> !FPOp->getFastMathFlags().unsafeAlgebra())
> + return false;
> + UsersToVisit.push_back(U);
> + } else if (const ShuffleVectorInst *ShufInst =
> + dyn_cast<ShuffleVectorInst>(U)) {
> + // Detect the following pattern: A ShuffleVector instruction
> together
> + // with a reduction that do partial reduction on the first and
> second
> + // ElemNumToReduce / 2 elements, and store the result in
> + // ElemNumToReduce / 2 elements in another vector.
> +
> + if (ElemNumToReduce == 1)
> + return false;
> + if (!isa<UndefValue>(U->getOperand(1)))
> + return false;
> + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
> + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
> + return false;
> + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
> + if (ShufInst->getMaskValue(i) != -1)
> + return false;
> +
> + // There is only one user of this ShuffleVector instruction,
> which must
> + // be a reduction operation.
> + if (!U->hasOneUse())
> + return false;
> +
> + auto U2 = dyn_cast<Instruction>(*U->user_begin());
> + if (!U2 || U2->getOpcode() != OpCode)
> + return false;
> +
> + // Check operands of the reduction operation.
> + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1)
> == U) ||
> + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0)
> == U)) {
> + UsersToVisit.push_back(U2);
> + ElemNumToReduce /= 2;
> + } else
> + return false;
> + } else if (isa<ExtractElementInst>(U)) {
> + // At this moment we should have reduced all elements in the
> vector.
> + if (ElemNumToReduce != 1)
> + return false;
> +
> + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
> + if (!Val || Val->getZExtValue() != 0)
> + return false;
> +
> + ReduxExtracted = true;
> + } else
> + return false;
> + }
> + }
> + return ReduxExtracted;
> +}
> +
> void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
> SDValue Op1 = getValue(I.getOperand(0));
> SDValue Op2 = getValue(I.getOperand(1));
> @@ -2315,6 +2434,7 @@ void SelectionDAGBuilder::visitBinary(co
> bool nuw = false;
> bool nsw = false;
> bool exact = false;
> + bool vec_redux = false;
> FastMathFlags FMF;
>
> if (const OverflowingBinaryOperator *OFBinOp =
> @@ -2328,10 +2448,16 @@ void SelectionDAGBuilder::visitBinary(co
> if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
> FMF = FPOp->getFastMathFlags();
>
> + if (isVectorReductionOp(&I)) {
> + vec_redux = true;
> + DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
> + }
> +
> SDNodeFlags Flags;
> Flags.setExact(exact);
> Flags.setNoSignedWrap(nsw);
> Flags.setNoUnsignedWrap(nuw);
> + Flags.setVectorReduction(vec_redux);
> if (EnableFMFInDAG) {
> Flags.setAllowReciprocal(FMF.allowReciprocal());
> Flags.setNoInfs(FMF.noInfs());
>
> Added: llvm/trunk/test/CodeGen/Generic/vector-redux.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/vector-redux.ll?rev=261070&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Generic/vector-redux.ll (added)
> +++ llvm/trunk/test/CodeGen/Generic/vector-redux.ll Wed Feb 17 00:37:04
> 2016
> @@ -0,0 +1,85 @@
> +; RUN: llc < %s -debug-only=isel -o /dev/null 2>&1 | FileCheck %s
> +; REQUIRES: asserts
> +
> + at a = global [1024 x i32] zeroinitializer, align 16
> +
> +define float @reduce_add_float(float* nocapture readonly %a) {
> +; CHECK-LABEL: reduce_add_float
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +; CHECK: Detected a reduction operation: {{.*}} fadd fast
> +;
> +entry:
> + br label %vector.body
> +
> +vector.body:
> + %index = phi i64 [ 0, %entry ], [ %index.next.4, %vector.body ]
> + %vec.phi = phi <4 x float> [ zeroinitializer, %entry ], [ %28,
> %vector.body ]
> + %vec.phi9 = phi <4 x float> [ zeroinitializer, %entry ], [ %29,
> %vector.body ]
> + %0 = getelementptr inbounds float, float* %a, i64 %index
> + %1 = bitcast float* %0 to <4 x float>*
> + %wide.load = load <4 x float>, <4 x float>* %1, align 4
> + %2 = getelementptr float, float* %0, i64 4
> + %3 = bitcast float* %2 to <4 x float>*
> + %wide.load10 = load <4 x float>, <4 x float>* %3, align 4
> + %4 = fadd fast <4 x float> %wide.load, %vec.phi
> + %5 = fadd fast <4 x float> %wide.load10, %vec.phi9
> + %index.next = add nuw nsw i64 %index, 8
> + %6 = getelementptr inbounds float, float* %a, i64 %index.next
> + %7 = bitcast float* %6 to <4 x float>*
> + %wide.load.1 = load <4 x float>, <4 x float>* %7, align 4
> + %8 = getelementptr float, float* %6, i64 4
> + %9 = bitcast float* %8 to <4 x float>*
> + %wide.load10.1 = load <4 x float>, <4 x float>* %9, align 4
> + %10 = fadd fast <4 x float> %wide.load.1, %4
> + %11 = fadd fast <4 x float> %wide.load10.1, %5
> + %index.next.1 = add nsw i64 %index, 16
> + %12 = getelementptr inbounds float, float* %a, i64 %index.next.1
> + %13 = bitcast float* %12 to <4 x float>*
> + %wide.load.2 = load <4 x float>, <4 x float>* %13, align 4
> + %14 = getelementptr float, float* %12, i64 4
> + %15 = bitcast float* %14 to <4 x float>*
> + %wide.load10.2 = load <4 x float>, <4 x float>* %15, align 4
> + %16 = fadd fast <4 x float> %wide.load.2, %10
> + %17 = fadd fast <4 x float> %wide.load10.2, %11
> + %index.next.2 = add nsw i64 %index, 24
> + %18 = getelementptr inbounds float, float* %a, i64 %index.next.2
> + %19 = bitcast float* %18 to <4 x float>*
> + %wide.load.3 = load <4 x float>, <4 x float>* %19, align 4
> + %20 = getelementptr float, float* %18, i64 4
> + %21 = bitcast float* %20 to <4 x float>*
> + %wide.load10.3 = load <4 x float>, <4 x float>* %21, align 4
> + %22 = fadd fast <4 x float> %wide.load.3, %16
> + %23 = fadd fast <4 x float> %wide.load10.3, %17
> + %index.next.3 = add nsw i64 %index, 32
> + %24 = getelementptr inbounds float, float* %a, i64 %index.next.3
> + %25 = bitcast float* %24 to <4 x float>*
> + %wide.load.4 = load <4 x float>, <4 x float>* %25, align 4
> + %26 = getelementptr float, float* %24, i64 4
> + %27 = bitcast float* %26 to <4 x float>*
> + %wide.load10.4 = load <4 x float>, <4 x float>* %27, align 4
> + %28 = fadd fast <4 x float> %wide.load.4, %22
> + %29 = fadd fast <4 x float> %wide.load10.4, %23
> + %index.next.4 = add nsw i64 %index, 40
> + %30 = icmp eq i64 %index.next.4, 1000
> + br i1 %30, label %middle.block, label %vector.body
> +
> +middle.block:
> + %.lcssa15 = phi <4 x float> [ %29, %vector.body ]
> + %.lcssa = phi <4 x float> [ %28, %vector.body ]
> + %bin.rdx = fadd fast <4 x float> %.lcssa15, %.lcssa
> + %rdx.shuf = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x
> i32> <i32 2, i32 3, i32 undef, i32 undef>
> + %bin.rdx11 = fadd fast <4 x float> %bin.rdx, %rdx.shuf
> + %rdx.shuf12 = shufflevector <4 x float> %bin.rdx11, <4 x float> undef,
> <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
> + %bin.rdx13 = fadd fast <4 x float> %bin.rdx11, %rdx.shuf12
> + %31 = extractelement <4 x float> %bin.rdx13, i32 0
> + ret float %31
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160217/5adc0248/attachment-0001.html>
More information about the llvm-commits
mailing list