[llvm-commits] [dragonegg] r131293 - in /dragonegg/trunk: include/dragonegg/Internals.h src/Convert.cpp
Duncan Sands
baldrick at free.fr
Fri May 13 08:04:04 PDT 2011
Author: baldrick
Date: Fri May 13 10:04:04 2011
New Revision: 131293
URL: http://llvm.org/viewvc/llvm-project?rev=131293&view=rev
Log:
Add support for REDUC_MAX_EXPR and REDUC_MIN_EXPR which may be formed
by the GCC tree vectorizer.
Modified:
dragonegg/trunk/include/dragonegg/Internals.h
dragonegg/trunk/src/Convert.cpp
Modified: dragonegg/trunk/include/dragonegg/Internals.h
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/include/dragonegg/Internals.h?rev=131293&r1=131292&r2=131293&view=diff
==============================================================================
--- dragonegg/trunk/include/dragonegg/Internals.h (original)
+++ dragonegg/trunk/include/dragonegg/Internals.h Fri May 13 10:04:04 2011
@@ -693,6 +693,8 @@
// Binary expressions.
Value *EmitReg_MinMaxExpr(tree_node *op0, tree_node *op1, unsigned UIPred,
unsigned SIPred, unsigned Opc);
+ Value *EmitReg_ReducMinMaxExpr(tree_node *op, unsigned UIPred,
+ unsigned SIPred, unsigned Opc);
Value *EmitReg_RotateOp(tree_node *type, tree_node *op0, tree_node *op1,
unsigned Opc1, unsigned Opc2);
Value *EmitReg_ShiftOp(tree_node *op0, tree_node *op1, unsigned Opc);
@@ -876,6 +878,18 @@
void StoreRegisterToMemory(Value *V, MemRef Loc, tree_node *type,
LLVMBuilder &Builder);
+ /// VectorHighElements - Return a vector of half the length, consisting of the
+ /// elements of the given vector with indices in the top half.
+ Value *VectorHighElements(Value *Vec);
+
+ /// VectorLowElements - Return a vector of half the length, consisting of the
+ /// elements of the given vector with indices in the bottom half.
+ Value *VectorLowElements(Value *Vec);
+
+ /// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
+ /// of the two pieces; repeat recursively on the result until scalar.
+ Value *ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred);
+
private:
// Optional target defined builtin intrinsic expanding function.
bool TargetIntrinsicLower(gimple_statement_d *stmt,
Modified: dragonegg/trunk/src/Convert.cpp
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/Convert.cpp?rev=131293&r1=131292&r2=131293&view=diff
==============================================================================
--- dragonegg/trunk/src/Convert.cpp (original)
+++ dragonegg/trunk/src/Convert.cpp Fri May 13 10:04:04 2011
@@ -6200,6 +6200,36 @@
SI->setAlignment(Loc.getAlignment());
}
+/// VectorHighElements - Return a vector of half the length, consisting of the
+/// elements of the given vector with indices in the top half.
+Value *TreeToLLVM::VectorHighElements(Value *Vec) {
+ const VectorType *Ty = cast<VectorType>(Vec->getType());
+ assert(!(Ty->getNumElements() & 1) && "Vector has odd number of elements!");
+ unsigned NumElts = Ty->getNumElements() / 2;
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElts);
+ const Type *Int32Ty = Type::getInt32Ty(Context);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Int32Ty, NumElts + i));
+ return Builder.CreateShuffleVector(Vec, UndefValue::get(Ty),
+ ConstantVector::get(Mask));
+}
+
+/// VectorLowElements - Return a vector of half the length, consisting of the
+/// elements of the given vector with indices in the bottom half.
+Value *TreeToLLVM::VectorLowElements(Value *Vec) {
+ const VectorType *Ty = cast<VectorType>(Vec->getType());
+ assert(!(Ty->getNumElements() & 1) && "Vector has odd number of elements!");
+ unsigned NumElts = Ty->getNumElements() / 2;
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElts);
+ const Type *Int32Ty = Type::getInt32Ty(Context);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(Int32Ty, i));
+ return Builder.CreateShuffleVector(Vec, UndefValue::get(Ty),
+ ConstantVector::get(Mask));
+}
+
//===----------------------------------------------------------------------===//
// ... EmitReg* - Convert register expression to LLVM...
@@ -6458,6 +6488,58 @@
return Builder.CreateSelect(Compare, LHS, RHS);
}
+/// ReducMinMaxExprHelper - Split the given vector in two and form the max/min
+/// of the two pieces; repeat recursively on the result until scalar.
+Value *TreeToLLVM::ReducMinMaxExprHelper(Value *Op, CmpInst::Predicate Pred) {
+ const VectorType *Ty = cast<VectorType>(Op->getType());
+ unsigned NumElts = Ty->getNumElements();
+ assert(NumElts > 1 && !(NumElts & (NumElts - 1)) &&
+ "Number of vector elements is not a power of 2!");
+
+ if (NumElts == 2) {
+ // Extract each of the two elements and return the max/min of them.
+ const Type *Int32Ty = Type::getInt32Ty(Context);
+ Value *LHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 0));
+ Value *RHS = Builder.CreateExtractElement(Op, ConstantInt::get(Int32Ty, 1));
+ Value *Compare = CmpInst::isFPPredicate(Pred) ?
+ Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
+ return Builder.CreateSelect(Compare, LHS, RHS);
+ }
+
+ // Recursively extract the high and low halves of the vector and take the
+ // max/min of them. Using vector select like this results in better code
+ // if the target supports it for at least one of the resulting vector sizes.
+ Value *LHS = VectorLowElements(Op), *RHS = VectorHighElements(Op);
+ Value *Compare = CmpInst::isFPPredicate(Pred) ?
+ Builder.CreateFCmp(Pred, LHS, RHS) : Builder.CreateICmp(Pred, LHS, RHS);
+ Op = Builder.CreateSelect(Compare, LHS, RHS);
+ return ReducMinMaxExprHelper(Op, Pred);
+}
+
+Value *TreeToLLVM::EmitReg_ReducMinMaxExpr(tree op, unsigned UIPred,
+ unsigned SIPred, unsigned FPPred) {
+ // Use a divide and conquer scheme that results in the same code as the simple
+ // scalar implementation on targets that don't support vector select but gives
+ // better code if vector select is present.
+ // For example, reduc-max <float x0, float x1, float x2, float x3> becomes
+ // v = max <float x0, float x1>, <float x2, float x3> <- vector select
+ // m = max float v0, float v1 <- scalar select; v = <float v0, float v1>
+ // The final result, m, equals max(max(x0,x2),max(x1,x3)) = max(x0,x1,x2,x3).
+ Value *Val = EmitRegister(op);
+ Value *Res;
+ if (FLOAT_TYPE_P(TREE_TYPE(op)))
+ Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(FPPred));
+ else if (TYPE_UNSIGNED(TREE_TYPE(op)))
+ Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(UIPred));
+ else
+ Res = ReducMinMaxExprHelper(Val, CmpInst::Predicate(SIPred));
+
+ // The result is vector with the max/min as first element.
+ return Builder.CreateInsertElement(UndefValue::get(Val->getType()), Res,
+ ConstantInt::get(Type::getInt32Ty(Context),
+ 0));
+}
+
Value *TreeToLLVM::EmitReg_RotateOp(tree type, tree op0, tree op1,
unsigned Opc1, unsigned Opc2) {
Value *In = EmitRegister(op0);
@@ -7011,13 +7093,7 @@
Value *Op = EmitRegister(op0);
// Extract the high elements, eg: <4 x float> -> <2 x float>.
- unsigned Length = TYPE_VECTOR_SUBPARTS(type);
- SmallVector<Constant*, 16> Mask;
- Mask.reserve(Length);
- for (unsigned i = 0; i != Length; ++i)
- Mask.push_back(ConstantInt::get(Type::getInt32Ty(Context), Length + i));
- Op = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
- ConstantVector::get(Mask));
+ Op = VectorHighElements(Op);
// Extend the input elements to the output element type, eg: <2 x float>
// -> <2 x double>.
@@ -7034,13 +7110,7 @@
Value *Op = EmitRegister(op0);
// Extract the low elements, eg: <4 x float> -> <2 x float>.
- unsigned Length = TYPE_VECTOR_SUBPARTS(type);
- SmallVector<Constant*, 16> Mask;
- Mask.reserve(Length);
- for (unsigned i = 0; i != Length; ++i)
- Mask.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
- Op = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
- ConstantVector::get(Mask));
+ Op = VectorLowElements(Op);
// Extend the input elements to the output element type, eg: <2 x float>
// -> <2 x double>.
@@ -7989,6 +8059,14 @@
RHS = EmitReg_POINTER_PLUS_EXPR(rhs1, rhs2); break;
case RDIV_EXPR:
RHS = EmitReg_RDIV_EXPR(rhs1, rhs2); break;
+ case REDUC_MAX_EXPR:
+ RHS = EmitReg_ReducMinMaxExpr(rhs1, ICmpInst::ICMP_UGE, ICmpInst::ICMP_SGE,
+ FCmpInst::FCMP_OGE);
+ break;
+ case REDUC_MIN_EXPR:
+ RHS = EmitReg_ReducMinMaxExpr(rhs1, ICmpInst::ICMP_ULE, ICmpInst::ICMP_SLE,
+ FCmpInst::FCMP_OLE);
+ break;
case ROUND_DIV_EXPR:
RHS = EmitReg_ROUND_DIV_EXPR(rhs1, rhs2); break;
case RROTATE_EXPR:
More information about the llvm-commits
mailing list