[llvm] r302631 - Add a late IR expansion pass for the experimental reduction intrinsics.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Wed May 10 02:42:49 PDT 2017
Author: aemerson
Date: Wed May 10 04:42:49 2017
New Revision: 302631
URL: http://llvm.org/viewvc/llvm-project?rev=302631&view=rev
Log:
Add a late IR expansion pass for the experimental reduction intrinsics.
This pass uses a new target hook to decide whether or not to expand a particular
intrinsic to the shuffevector sequence.
Differential Revision: https://reviews.llvm.org/D32245
Added:
llvm/trunk/include/llvm/CodeGen/ExpandReductions.h
llvm/trunk/lib/CodeGen/ExpandReductions.cpp
llvm/trunk/test/CodeGen/Generic/expand-experimental-reductions.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/include/llvm/CodeGen/Passes.h
llvm/trunk/include/llvm/InitializePasses.h
llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/CodeGen/CMakeLists.txt
llvm/trunk/lib/CodeGen/TargetPassConfig.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
llvm/trunk/test/CodeGen/X86/O0-pipeline.ll
llvm/trunk/tools/llc/llc.cpp
llvm/trunk/tools/opt/opt.cpp
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Wed May 10 04:42:49 2017
@@ -753,6 +753,9 @@ public:
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
+ /// \returns True if the target wants to expand the given reduction intrinsic
+ /// into a shuffle sequence.
+ bool shouldExpandReduction(const IntrinsicInst *II) const;
/// @}
private:
@@ -910,6 +913,7 @@ public:
VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
+ virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
};
template <typename T>
@@ -1219,6 +1223,9 @@ public:
ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}
+ bool shouldExpandReduction(const IntrinsicInst *II) const override {
+ return Impl.shouldExpandReduction(II);
+ }
};
template <typename T>
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Wed May 10 04:42:49 2017
@@ -462,6 +462,10 @@ public:
return false;
}
+ bool shouldExpandReduction(const IntrinsicInst *II) const {
+ return true;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Added: llvm/trunk/include/llvm/CodeGen/ExpandReductions.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ExpandReductions.h?rev=302631&view=auto
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ExpandReductions.h (added)
+++ llvm/trunk/include/llvm/CodeGen/ExpandReductions.h Wed May 10 04:42:49 2017
@@ -0,0 +1,24 @@
+//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXPANDREDUCTIONS_H
+#define LLVM_CODEGEN_EXPANDREDUCTIONS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ExpandReductionsPass
+ : public PassInfoMixin<ExpandReductionsPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_EXPANDREDUCTIONS_H
Modified: llvm/trunk/include/llvm/CodeGen/Passes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Passes.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/Passes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/Passes.h Wed May 10 04:42:49 2017
@@ -405,6 +405,10 @@ namespace llvm {
/// printing assembly.
ModulePass *createMachineOutlinerPass();
+ /// This pass expands the experimental reduction intrinsics into sequences of
+ /// shuffles.
+ FunctionPass *createExpandReductionsPass();
+
} // End llvm namespace
/// Target machine pass initializer for passes with dependencies. Use with
Modified: llvm/trunk/include/llvm/InitializePasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/include/llvm/InitializePasses.h (original)
+++ llvm/trunk/include/llvm/InitializePasses.h Wed May 10 04:42:49 2017
@@ -130,6 +130,7 @@ void initializeEfficiencySanitizerPass(P
void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
void initializeExpandISelPseudosPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
+void initializeExpandReductionsPass(PassRegistry&);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Wed May 10 04:42:49 2017
@@ -491,6 +491,12 @@ bool canSinkOrHoistInst(Instruction &I,
LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Generates a vector reduction using shufflevectors to reduce the value.
+Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+ RecurrenceDescriptor::MinMaxRecurrenceKind
+ MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
+ ArrayRef<Value *> RedOps = ArrayRef<Value *>());
+
/// Create a target reduction of the given vector. The reduction operation
/// is described by the \p Opcode parameter. min/max reductions require
/// additional information supplied in \p Flags.
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Wed May 10 04:42:49 2017
@@ -505,6 +505,9 @@ bool TargetTransformInfo::useReductionIn
return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
}
+bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
+ return TTIImpl->shouldExpandReduction(II);
+}
TargetTransformInfo::Concept::~Concept() {}
Modified: llvm/trunk/lib/CodeGen/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CMakeLists.txt?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CMakeLists.txt (original)
+++ llvm/trunk/lib/CodeGen/CMakeLists.txt Wed May 10 04:42:49 2017
@@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
+ ExpandReductions.cpp
FaultMaps.cpp
FEntryInserter.cpp
FuncletLayout.cpp
Added: llvm/trunk/lib/CodeGen/ExpandReductions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ExpandReductions.cpp?rev=302631&view=auto
==============================================================================
--- llvm/trunk/lib/CodeGen/ExpandReductions.cpp (added)
+++ llvm/trunk/lib/CodeGen/ExpandReductions.cpp Wed May 10 04:42:49 2017
@@ -0,0 +1,167 @@
+//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for reduction intrinsics, allowing targets
+// to enable the experimental intrinsics until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+unsigned getOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ return Instruction::FAdd;
+ case Intrinsic::experimental_vector_reduce_fmul:
+ return Instruction::FMul;
+ case Intrinsic::experimental_vector_reduce_add:
+ return Instruction::Add;
+ case Intrinsic::experimental_vector_reduce_mul:
+ return Instruction::Mul;
+ case Intrinsic::experimental_vector_reduce_and:
+ return Instruction::And;
+ case Intrinsic::experimental_vector_reduce_or:
+ return Instruction::Or;
+ case Intrinsic::experimental_vector_reduce_xor:
+ return Instruction::Xor;
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ return Instruction::ICmp;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unexpected ID");
+ }
+}
+
+RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_smax:
+ return RecurrenceDescriptor::MRK_SIntMax;
+ case Intrinsic::experimental_vector_reduce_smin:
+ return RecurrenceDescriptor::MRK_SIntMin;
+ case Intrinsic::experimental_vector_reduce_umax:
+ return RecurrenceDescriptor::MRK_UIntMax;
+ case Intrinsic::experimental_vector_reduce_umin:
+ return RecurrenceDescriptor::MRK_UIntMin;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ return RecurrenceDescriptor::MRK_FloatMax;
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return RecurrenceDescriptor::MRK_FloatMin;
+ default:
+ return RecurrenceDescriptor::MRK_Invalid;
+ }
+}
+
+bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
+ bool Changed = false;
+ SmallVector<IntrinsicInst*, 4> Worklist;
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (auto II = dyn_cast<IntrinsicInst>(&*I))
+ Worklist.push_back(II);
+
+ for (auto *II : Worklist) {
+ IRBuilder<> Builder(II);
+ Value *Vec = nullptr;
+ auto ID = II->getIntrinsicID();
+ auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_fmul:
+ // FMFs must be attached to the call, otherwise it's an ordered reduction
+ // and it can't be handled by generating this shuffle sequence.
+ // TODO: Implement scalarization of ordered reductions here for targets
+ // without native support.
+ if (!II->getFastMathFlags().unsafeAlgebra())
+ continue;
+ Vec = II->getArgOperand(1);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ Vec = II->getArgOperand(0);
+ MRK = getMRK(ID);
+ break;
+ default:
+ continue;
+ }
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+ auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ II->replaceAllUsesWith(Rdx);
+ II->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+}
+
+class ExpandReductions : public FunctionPass {
+public:
+ static char ID;
+ ExpandReductions() : FunctionPass(ID) {
+ initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return expandReductions(F, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char ExpandReductions::ID;
+INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+
+FunctionPass *llvm::createExpandReductionsPass() {
+ return new ExpandReductions();
+}
+
+PreservedAnalyses ExpandReductionsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!expandReductions(F, &TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
Modified: llvm/trunk/lib/CodeGen/TargetPassConfig.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetPassConfig.cpp?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetPassConfig.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetPassConfig.cpp Wed May 10 04:42:49 2017
@@ -487,6 +487,9 @@ void TargetPassConfig::addIRPasses() {
// Insert calls to mcount-like functions.
addPass(createCountingFunctionInserterPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ addPass(createExpandReductionsPass());
}
/// Turn exception handling constructs into something the code generators can
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Wed May 10 04:42:49 2017
@@ -137,6 +137,10 @@ public:
unsigned getMinPrefetchStride();
unsigned getMaxPrefetchIterationsAhead();
+
+ bool shouldExpandReduction(const IntrinsicInst *II) const {
+ return false;
+ }
/// @}
};
Modified: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp Wed May 10 04:42:49 2017
@@ -1125,11 +1125,10 @@ static Value *addFastMathFlag(Value *V)
}
// Helper to generate a log2 shuffle reduction.
-static Value *
-getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
- RecurrenceDescriptor::MRK_Invalid,
- ArrayRef<Value *> RedOps = ArrayRef<Value *>()) {
+Value *
+llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+ RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+ ArrayRef<Value *> RedOps) {
unsigned VF = Src->getType()->getVectorNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
Added: llvm/trunk/test/CodeGen/Generic/expand-experimental-reductions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/expand-experimental-reductions.ll?rev=302631&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Generic/expand-experimental-reductions.ll (added)
+++ llvm/trunk/test/CodeGen/Generic/expand-experimental-reductions.ll Wed May 10 04:42:49 2017
@@ -0,0 +1,210 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -expand-reductions -S | FileCheck %s
+; Tests without a target which should expand all reductions
+declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
+
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+
+declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
+
+declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
+
+
+define i64 @add_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @add_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @mul_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @mul_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @and_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @and_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @or_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @or_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @xor_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @xor_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define float @fadd_f32(<4 x float> %vec) {
+; CHECK-LABEL: @fadd_f32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret float [[TMP0]]
+;
+entry:
+ %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
+ ret float %r
+}
+
+define float @fadd_f32_strict(<4 x float> %vec) {
+; CHECK-LABEL: @fadd_f32_strict(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]])
+; CHECK-NEXT: ret float [[R]]
+;
+entry:
+ %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
+ ret float %r
+}
+
+define float @fmul_f32(<4 x float> %vec) {
+; CHECK-LABEL: @fmul_f32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret float [[TMP0]]
+;
+entry:
+ %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
+ ret float %r
+}
+
+define i64 @smax_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @smax_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @smin_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @smin_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @umax_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @umax_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define i64 @umin_i64(<2 x i64> %vec) {
+; CHECK-LABEL: @umin_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
+ ret i64 %r
+}
+
+define double @fmax_f64(<2 x double> %vec) {
+; CHECK-LABEL: @fmax_f64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret double [[TMP0]]
+;
+entry:
+ %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
+ ret double %r
+}
+
+define double @fmin_f64(<2 x double> %vec) {
+; CHECK-LABEL: @fmin_f64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
+; CHECK-NEXT: ret double [[TMP0]]
+;
+entry:
+ %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
+ ret double %r
+}
Modified: llvm/trunk/test/CodeGen/X86/O0-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O0-pipeline.ll?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/O0-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/O0-pipeline.ll Wed May 10 04:42:49 2017
@@ -23,6 +23,7 @@
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Inserts calls to mcount-like functions
+; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Rewrite Symbols
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
Modified: llvm/trunk/tools/llc/llc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/tools/llc/llc.cpp (original)
+++ llvm/trunk/tools/llc/llc.cpp Wed May 10 04:42:49 2017
@@ -301,6 +301,7 @@ int main(int argc, char **argv) {
initializeConstantHoistingLegacyPassPass(*Registry);
initializeScalarOpts(*Registry);
initializeVectorization(*Registry);
+ initializeExpandReductionsPass(*Registry);
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
Modified: llvm/trunk/tools/opt/opt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/opt/opt.cpp?rev=302631&r1=302630&r2=302631&view=diff
==============================================================================
--- llvm/trunk/tools/opt/opt.cpp (original)
+++ llvm/trunk/tools/opt/opt.cpp Wed May 10 04:42:49 2017
@@ -397,6 +397,7 @@ int main(int argc, char **argv) {
initializeInterleavedAccessPass(Registry);
initializeCountingFunctionInserterPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry);
+ initializeExpandReductionsPass(Registry);
#ifdef LINK_POLLY_INTO_TOOLS
polly::initializePollyPasses(Registry);
More information about the llvm-commits
mailing list